From 6fb00d4e94bc28c39fa077b03e6531956de87802 Mon Sep 17 00:00:00 2001 From: Manish Mandlik Date: Mon, 1 Jun 2020 18:42:51 -0700 Subject: Bluetooth: Check scan state before disabling during suspend Check current scan state by checking HCI_LE_SCAN flag and send scan disable command only if scan is already enabled. Signed-off-by: Manish Mandlik Reviewed-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1fc55685da62..1acf5b8e0910 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -998,8 +998,9 @@ static void hci_req_set_event_filter(struct hci_request *req) static void hci_req_config_le_suspend_scan(struct hci_request *req) { - /* Can't change params without disabling first */ - hci_req_add_le_scan_disable(req); + /* Before changing params disable scan if enabled */ + if (hci_dev_test_flag(req->hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); /* Configure params and enable scanning */ hci_req_add_le_passive_scan(req); @@ -1065,8 +1066,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) page_scan = SCAN_DISABLED; hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan); - /* Disable LE passive scan */ - hci_req_add_le_scan_disable(&req); + /* Disable LE passive scan if enabled */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(&req); /* Mark task needing completion */ set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); -- cgit From 30965242268c39eaaa4e845e0559cd2635d016a6 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Mon, 1 Jun 2020 14:20:59 +0000 Subject: Bluetooth: Removing noisy dbg message This patch removes a particularly noisy dbg message. The debug message isn't particularly interesting for debuggability so it was simply removed to reduce noise in dbg logs. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 3fd124927d4d..b751a7c1b20f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -453,8 +453,6 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; __poll_t mask = 0; - BT_DBG("sock %p, sk %p", sock, sk); - poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) -- cgit From a9ec8423134a54c9f0ae8d4ef59e1e833ca917c2 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Fri, 5 Jun 2020 13:50:15 -0700 Subject: Bluetooth: Allow suspend even when preparation has failed It is preferable to allow suspend even when Bluetooth has problems preparing for sleep. When Bluetooth fails to finish preparing for suspend, log the error and allow the suspend notifier to continue instead. To also make it clearer why suspend failed, change bt_dev_dbg to bt_dev_err when handling the suspend timeout. Fixes: dd522a7429b07e ("Bluetooth: Handle LE devices during suspend") Reported-by: Len Brown Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index dbe2d79f233f..83ce665d3cbf 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3289,10 +3289,10 @@ static int hci_suspend_wait_event(struct hci_dev *hdev) WAKE_COND, SUSPEND_NOTIFIER_TIMEOUT); if (ret == 0) { - bt_dev_dbg(hdev, "Timed out waiting for suspend"); + bt_dev_err(hdev, "Timed out waiting for suspend events"); for (i = 0; i < __SUSPEND_NUM_TASKS; ++i) { if (test_bit(i, hdev->suspend_tasks)) - bt_dev_dbg(hdev, "Bit %d is set", i); + bt_dev_err(hdev, "Suspend timeout bit: %d", i); clear_bit(i, hdev->suspend_tasks); } @@ -3360,12 +3360,15 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, ret = hci_change_suspend_state(hdev, BT_RUNNING); } - /* If suspend failed, restore it to running */ - if (ret && action == PM_SUSPEND_PREPARE) - hci_change_suspend_state(hdev, BT_RUNNING); - done: - return ret ? notifier_from_errno(-EBUSY) : NOTIFY_STOP; + /* We always allow suspend even if suspend preparation failed and + * attempt to recover in resume. + */ + if (ret) + bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d", + action, ret); + + return NOTIFY_STOP; } /* Alloc HCI device */ -- cgit From 00398e1d518309328e8ba7dff00881538ac22c6a Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 19:50:41 +0000 Subject: Bluetooth: Add support for BT_PKT_STATUS CMSG data for SCO connections This change adds support for reporting the BT_PKT_STATUS to the socket CMSG data to allow the implementation of a packet loss correction on erroneous data received on the SCO socket. The patch was partially developed by Marcel Holtmann and validated by Hsin-yu Chao. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 3 +++ net/bluetooth/hci_core.c | 1 + net/bluetooth/sco.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index b751a7c1b20f..4ef6a54403aa 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -286,6 +286,9 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); + + if (bt_sk(sk)->skb_put_cmsg) + bt_sk(sk)->skb_put_cmsg(skb, msg, sk); } skb_free_datagram(sk, skb); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 83ce665d3cbf..00458a8c26f8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4554,6 +4554,7 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) if (conn) { /* Send to upper protocol */ + bt_cb(skb)->sco.pkt_status = flags & 0x03; sco_recv_scodata(conn, skb); return; } else { diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c8c3d38cdc7b..83a48860bb5d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -66,6 +66,7 @@ struct sco_pinfo { bdaddr_t dst; __u32 flags; __u16 setting; + __u8 cmsg_mask; struct sco_conn *conn; }; @@ -449,6 +450,15 @@ static void sco_sock_close(struct sock *sk) sco_sock_kill(sk); } +static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg, + struct sock *sk) +{ + if (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS) + put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS, + sizeof(bt_cb(skb)->sco.pkt_status), + &bt_cb(skb)->sco.pkt_status); +} + static void sco_sock_init(struct sock *sk, struct sock *parent) { BT_DBG("sk %p", sk); @@ -457,6 +467,8 @@ static void sco_sock_init(struct sock *sk, struct sock *parent) sk->sk_type = parent->sk_type; bt_sk(sk)->flags = bt_sk(parent)->flags; security_sk_clone(parent, sk); + } else { + bt_sk(sk)->skb_put_cmsg = sco_skb_put_cmsg; } } @@ -846,6 +858,18 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, sco_pi(sk)->setting = voice.setting; break; + case BT_PKT_STATUS: + if (get_user(opt, (u32 __user *)optval)) { + err = -EFAULT; + break; + } + + if (opt) + sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS; + else + sco_pi(sk)->cmsg_mask &= SCO_CMSG_PKT_STATUS; + break; + default: err = -ENOPROTOOPT; break; @@ -923,6 +947,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, int len, err = 0; struct bt_voice voice; u32 phys; + int pkt_status; BT_DBG("sk %p", sk); @@ -969,6 +994,13 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, err = -EFAULT; break; + case BT_PKT_STATUS: + pkt_status = (sco_pi(sk)->cmsg_mask & SCO_CMSG_PKT_STATUS); + + if (put_user(pkt_status, (int __user *)optval)) + err = -EFAULT; + break; + default: err = -ENOPROTOOPT; break; -- cgit From 32929e1f4ad9adf71f655028e4dd5d87adb97f52 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 14:26:10 +0000 Subject: Bluetooth: Use only 8 bits for the HCI CMSG state flags This change implements suggestions from the code review of the SCO CMSG state flag patch. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_sock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index caf38a8ea6a8..d5627967fc25 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -52,7 +52,7 @@ struct hci_pinfo { struct bt_sock bt; struct hci_dev *hdev; struct hci_filter filter; - __u32 cmsg_mask; + __u8 cmsg_mask; unsigned short channel; unsigned long flags; __u32 cookie; @@ -1399,7 +1399,7 @@ done: static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { - __u32 mask = hci_pi(sk)->cmsg_mask; + __u8 mask = hci_pi(sk)->cmsg_mask; if (mask & HCI_CMSG_DIR) { int incoming = bt_cb(skb)->incoming; -- cgit From 10873f99ced274cbfc119f55e7e57a0f047a0799 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 02:01:56 +0000 Subject: Bluetooth: centralize default value initialization. This patch centralized the initialization of default parameters. This is required to allow clients to more easily customize the default system parameters. Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 14 ++++---------- net/bluetooth/hci_core.c | 14 +++++++++++++- net/bluetooth/hci_request.c | 15 +++++---------- 3 files changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 307800fd18e6..9bdffc4e79b0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -789,11 +789,8 @@ static void set_ext_conn_params(struct hci_conn *conn, memset(p, 0, sizeof(*p)); - /* Set window to be the same value as the interval to - * enable continuous scanning. - */ - p->scan_interval = cpu_to_le16(hdev->le_scan_interval); - p->scan_window = p->scan_interval; + p->scan_interval = cpu_to_le16(hdev->le_scan_int_connect); + p->scan_window = cpu_to_le16(hdev->le_scan_window_connect); p->conn_interval_min = cpu_to_le16(conn->le_conn_min_interval); p->conn_interval_max = cpu_to_le16(conn->le_conn_max_interval); p->conn_latency = cpu_to_le16(conn->le_conn_latency); @@ -875,11 +872,8 @@ static void hci_req_add_le_create_conn(struct hci_request *req, memset(&cp, 0, sizeof(cp)); - /* Set window to be the same value as the interval to enable - * continuous scanning. - */ - cp.scan_interval = cpu_to_le16(hdev->le_scan_interval); - cp.scan_window = cp.scan_interval; + cp.scan_interval = cpu_to_le16(hdev->le_scan_int_connect); + cp.scan_window = cpu_to_le16(hdev->le_scan_window_connect); bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 00458a8c26f8..4f1052a7c488 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2982,7 +2982,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, adv_instance->remaining_time = timeout; if (duration == 0) - adv_instance->duration = HCI_DEFAULT_ADV_DURATION; + adv_instance->duration = hdev->def_multi_adv_rotation_duration; else adv_instance->duration = duration; @@ -3400,6 +3400,12 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_adv_max_interval = 0x0800; hdev->le_scan_interval = 0x0060; hdev->le_scan_window = 0x0030; + hdev->le_scan_int_suspend = 0x0400; + hdev->le_scan_window_suspend = 0x0012; + hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT; + hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN; + hdev->le_scan_int_connect = 0x0060; + hdev->le_scan_window_connect = 0x0060; hdev->le_conn_min_interval = 0x0018; hdev->le_conn_max_interval = 0x0028; hdev->le_conn_latency = 0x0000; @@ -3415,6 +3421,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES; + hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; @@ -3423,6 +3430,11 @@ struct hci_dev *hci_alloc_dev(void) hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT; hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE; + /* default 1.28 sec page scan */ + hdev->def_page_scan_type = PAGE_SCAN_TYPE_STANDARD; + hdev->def_page_scan_int = 0x0800; + hdev->def_page_scan_window = 0x0012; + mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1acf5b8e0910..a7f572ad38ef 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -34,9 +34,6 @@ #define HCI_REQ_PEND 1 #define HCI_REQ_CANCELED 2 -#define LE_SUSPEND_SCAN_WINDOW 0x0012 -#define LE_SUSPEND_SCAN_INTERVAL 0x0400 - void hci_req_init(struct hci_request *req, struct hci_dev *hdev) { skb_queue_head_init(&req->cmd_q); @@ -366,13 +363,11 @@ void __hci_req_write_fast_connectable(struct hci_request *req, bool enable) /* 160 msec page scan interval */ acp.interval = cpu_to_le16(0x0100); } else { - type = PAGE_SCAN_TYPE_STANDARD; /* default */ - - /* default 1.28 sec page scan */ - acp.interval = cpu_to_le16(0x0800); + type = hdev->def_page_scan_type; + acp.interval = cpu_to_le16(hdev->def_page_scan_int); } - acp.window = cpu_to_le16(0x0012); + acp.window = cpu_to_le16(hdev->def_page_scan_window); if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || __cpu_to_le16(hdev->page_scan_window) != acp.window) @@ -927,8 +922,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req) filter_policy |= 0x02; if (hdev->suspended) { - window = LE_SUSPEND_SCAN_WINDOW; - interval = LE_SUSPEND_SCAN_INTERVAL; + window = hdev->le_scan_window_suspend; + interval = hdev->le_scan_int_suspend; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; -- cgit From 17896406ff3592d47b476ddd29276bf9cf8a26dd Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Thu, 11 Jun 2020 02:01:57 +0000 Subject: Bluetooth: implement read/set default system parameters mgmt This patch implements the read default system parameters and the set default system parameters mgmt commands. Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- net/bluetooth/Makefile | 2 +- net/bluetooth/mgmt.c | 8 ++ net/bluetooth/mgmt_config.c | 253 ++++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt_config.h | 11 ++ 4 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 net/bluetooth/mgmt_config.c create mode 100644 net/bluetooth/mgmt_config.h (limited to 'net') diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 41dd541a44a5..1c645fba8c49 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -14,7 +14,7 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ - ecdh_helper.o hci_request.o mgmt_util.o + ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o bluetooth-$(CONFIG_BT_BREDR) += sco.o bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9e8a3cccc6ca..99fbfd467d04 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -36,6 +36,7 @@ #include "hci_request.h" #include "smp.h" #include "mgmt_util.h" +#include "mgmt_config.h" #define MGMT_VERSION 1 #define MGMT_REVISION 17 @@ -111,6 +112,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_SECURITY_INFO, MGMT_OP_READ_EXP_FEATURES_INFO, MGMT_OP_SET_EXP_FEATURE, + MGMT_OP_READ_DEF_SYSTEM_CONFIG, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, }; static const u16 mgmt_events[] = { @@ -162,6 +165,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_EXT_INFO, MGMT_OP_READ_SECURITY_INFO, MGMT_OP_READ_EXP_FEATURES_INFO, + MGMT_OP_READ_DEF_SYSTEM_CONFIG, }; static const u16 mgmt_untrusted_events[] = { @@ -7297,6 +7301,10 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { set_exp_feature, MGMT_SET_EXP_FEATURE_SIZE, HCI_MGMT_VAR_LEN | HCI_MGMT_HDEV_OPTIONAL }, + { read_def_system_config, MGMT_READ_DEF_SYSTEM_CONFIG_SIZE, + HCI_MGMT_UNTRUSTED }, + { set_def_system_config, MGMT_SET_DEF_SYSTEM_CONFIG_SIZE, + HCI_MGMT_VAR_LEN }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c new file mode 100644 index 000000000000..f6dfbe93542c --- /dev/null +++ b/net/bluetooth/mgmt_config.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (C) 2020 Google Corporation + */ + +#include +#include +#include + +#include "mgmt_util.h" +#include "mgmt_config.h" + +#define HDEV_PARAM_U16(_param_code_, _param_name_) \ +{ \ + { cpu_to_le16(_param_code_), sizeof(__u16) }, \ + { cpu_to_le16(hdev->_param_name_) } \ +} + +int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len) +{ + struct { + struct mgmt_tlv entry; + union { + /* This is a simplification for now since all values + * are 16 bits. In the future, this code may need + * refactoring to account for variable length values + * and properly calculate the required buffer size. + */ + __le16 value; + }; + } __packed params[] = { + /* Please see mgmt-api.txt for documentation of these values */ + HDEV_PARAM_U16(0x0000, def_page_scan_type), + HDEV_PARAM_U16(0x0001, def_page_scan_int), + HDEV_PARAM_U16(0x0002, def_page_scan_window), + HDEV_PARAM_U16(0x0003, def_inq_scan_type), + HDEV_PARAM_U16(0x0004, def_inq_scan_int), + HDEV_PARAM_U16(0x0005, def_inq_scan_window), + HDEV_PARAM_U16(0x0006, def_br_lsto), + HDEV_PARAM_U16(0x0007, def_page_timeout), + HDEV_PARAM_U16(0x0008, sniff_min_interval), + HDEV_PARAM_U16(0x0009, sniff_max_interval), + HDEV_PARAM_U16(0x000a, le_adv_min_interval), + HDEV_PARAM_U16(0x000b, le_adv_max_interval), + HDEV_PARAM_U16(0x000c, def_multi_adv_rotation_duration), + HDEV_PARAM_U16(0x000d, le_scan_interval), + HDEV_PARAM_U16(0x000e, le_scan_window), + HDEV_PARAM_U16(0x000f, le_scan_int_suspend), + HDEV_PARAM_U16(0x0010, le_scan_window_suspend), + HDEV_PARAM_U16(0x0011, le_scan_int_discovery), + HDEV_PARAM_U16(0x0012, le_scan_window_discovery), + HDEV_PARAM_U16(0x0013, le_scan_int_adv_monitor), + HDEV_PARAM_U16(0x0014, le_scan_window_adv_monitor), + HDEV_PARAM_U16(0x0015, le_scan_int_connect), + HDEV_PARAM_U16(0x0016, le_scan_window_connect), + HDEV_PARAM_U16(0x0017, le_conn_min_interval), + HDEV_PARAM_U16(0x0018, le_conn_max_interval), + HDEV_PARAM_U16(0x0019, le_conn_latency), + HDEV_PARAM_U16(0x001a, le_supv_timeout), + }; + struct mgmt_rp_read_def_system_config *rp = (void *)params; + + bt_dev_dbg(hdev, "sock %p", sk); + + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_DEF_SYSTEM_CONFIG, + 0, rp, sizeof(params)); +} + +#define TO_TLV(x) ((struct mgmt_tlv *)(x)) +#define TLV_GET_LE16(tlv) le16_to_cpu(*((__le16 *)(TO_TLV(tlv)->value))) + +int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len) +{ + u16 buffer_left = data_len; + u8 *buffer = data; + + if (buffer_left < sizeof(struct mgmt_tlv)) { + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_STATUS_INVALID_PARAMS); + } + + /* First pass to validate the tlv */ + while (buffer_left >= sizeof(struct mgmt_tlv)) { + const u8 len = TO_TLV(buffer)->length; + const u16 exp_len = sizeof(struct mgmt_tlv) + + len; + const u16 type = le16_to_cpu(TO_TLV(buffer)->type); + + if (buffer_left < exp_len) { + bt_dev_warn(hdev, "invalid len left %d, exp >= %d", + buffer_left, exp_len); + + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_STATUS_INVALID_PARAMS); + } + + /* Please see mgmt-api.txt for documentation of these values */ + switch (type) { + case 0x0000: + case 0x0001: + case 0x0002: + case 0x0003: + case 0x0004: + case 0x0005: + case 0x0006: + case 0x0007: + case 0x0008: + case 0x0009: + case 0x000a: + case 0x000b: + case 0x000c: + case 0x000d: + case 0x000e: + case 0x000f: + case 0x0010: + case 0x0011: + case 0x0012: + case 0x0013: + case 0x0014: + case 0x0015: + case 0x0016: + case 0x0017: + case 0x0018: + case 0x0019: + case 0x001a: + if (len != sizeof(u16)) { + bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d", + len, sizeof(u16), type); + + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_STATUS_INVALID_PARAMS); + } + break; + default: + bt_dev_warn(hdev, "unsupported parameter %u", type); + break; + } + + buffer_left -= exp_len; + buffer += exp_len; + } + + buffer_left = data_len; + buffer = data; + while (buffer_left >= sizeof(struct mgmt_tlv)) { + const u8 len = TO_TLV(buffer)->length; + const u16 exp_len = sizeof(struct mgmt_tlv) + + len; + const u16 type = le16_to_cpu(TO_TLV(buffer)->type); + + switch (type) { + case 0x0000: + hdev->def_page_scan_type = TLV_GET_LE16(buffer); + break; + case 0x0001: + hdev->def_page_scan_int = TLV_GET_LE16(buffer); + break; + case 0x0002: + hdev->def_page_scan_window = TLV_GET_LE16(buffer); + break; + case 0x0003: + hdev->def_inq_scan_type = TLV_GET_LE16(buffer); + break; + case 0x0004: + hdev->def_inq_scan_int = TLV_GET_LE16(buffer); + break; + case 0x0005: + hdev->def_inq_scan_window = TLV_GET_LE16(buffer); + break; + case 0x0006: + hdev->def_br_lsto = TLV_GET_LE16(buffer); + break; + case 0x0007: + hdev->def_page_timeout = TLV_GET_LE16(buffer); + break; + case 0x0008: + hdev->sniff_min_interval = TLV_GET_LE16(buffer); + break; + case 0x0009: + hdev->sniff_max_interval = TLV_GET_LE16(buffer); + break; + case 0x000a: + hdev->le_adv_min_interval = TLV_GET_LE16(buffer); + break; + case 0x000b: + hdev->le_adv_max_interval = TLV_GET_LE16(buffer); + break; + case 0x000c: + hdev->def_multi_adv_rotation_duration = + TLV_GET_LE16(buffer); + break; + case 0x000d: + hdev->le_scan_interval = TLV_GET_LE16(buffer); + break; + case 0x000e: + hdev->le_scan_window = TLV_GET_LE16(buffer); + break; + case 0x000f: + hdev->le_scan_int_suspend = TLV_GET_LE16(buffer); + break; + case 0x0010: + hdev->le_scan_window_suspend = TLV_GET_LE16(buffer); + break; + case 0x0011: + hdev->le_scan_int_discovery = TLV_GET_LE16(buffer); + break; + case 0x00012: + hdev->le_scan_window_discovery = TLV_GET_LE16(buffer); + break; + case 0x00013: + hdev->le_scan_int_adv_monitor = TLV_GET_LE16(buffer); + break; + case 0x00014: + hdev->le_scan_window_adv_monitor = TLV_GET_LE16(buffer); + break; + case 0x00015: + hdev->le_scan_int_connect = TLV_GET_LE16(buffer); + break; + case 0x00016: + hdev->le_scan_window_connect = TLV_GET_LE16(buffer); + break; + case 0x00017: + hdev->le_conn_min_interval = TLV_GET_LE16(buffer); + break; + case 0x00018: + hdev->le_conn_max_interval = TLV_GET_LE16(buffer); + break; + case 0x00019: + hdev->le_conn_latency = TLV_GET_LE16(buffer); + break; + case 0x0001a: + hdev->le_supv_timeout = TLV_GET_LE16(buffer); + break; + default: + bt_dev_warn(hdev, "unsupported parameter %u", type); + break; + } + + buffer_left -= exp_len; + buffer += exp_len; + } + + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_STATUS_SUCCESS); +} diff --git a/net/bluetooth/mgmt_config.h b/net/bluetooth/mgmt_config.h new file mode 100644 index 000000000000..51da6e63b1a0 --- /dev/null +++ b/net/bluetooth/mgmt_config.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (C) 2020 Google Corporation + */ + +int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len); + +int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len); -- cgit From cad2929dc4321b1f237767e9bd271b61a2eaa752 Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Wed, 17 Jun 2020 13:56:05 +0700 Subject: tipc: update a binding service via broadcast Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. This way, the binding table can be updated on all peer nodes in one shot. Broadcast is used when all peer nodes, as indicated by a new capability flag TIPC_NAMED_BCAST, support reception of this message type. Four problems need to be considered when introducing this feature. 1) When establishing a link to a new peer node we still update this by a unicast 'bulk' update. This may lead to race conditions, where a later broadcast publication/withdrawal bypass the 'bulk', resulting in disordered publications, or even that a withdrawal may arrive before the corresponding publication. We solve this by adding an 'is_last_bulk' bit in the last bulk messages so that it can be distinguished from all other messages. Only when this message has arrived do we open up for reception of broadcast publications/withdrawals. 2) When a first legacy node is added to the cluster all distribution will switch over to use the legacy 'replicast' method, while the opposite happens when the last legacy node leaves the cluster. This entails another risk of message disordering that has to be handled. We solve this by adding a sequence number to the broadcast/replicast messages, so that disordering can be discovered and corrected. Note however that we don't need to consider potential message loss or duplication at this protocol level. 3) Bulk messages don't contain any sequence numbers, and will always arrive in order. Hence we must exempt those from the sequence number control and deliver them unconditionally. We solve this by adding a new 'is_bulk' bit in those messages so that they can be recognized. 4) Legacy messages, which don't contain any new bits or sequence numbers, but neither can arrive out of order, also need to be exempt from the initial synchronization and sequence number check, and delivered unconditionally. Therefore, we add another 'is_not_legacy' bit to all new messages so that those can be distinguished from legacy messages and the latter delivered directly. v1->v2: - fix warning issue reported by kbuild test robot - add santiy check to drop the publication message with a sequence number that is lower than the agreed synch point Signed-off-by: kernel test robot Signed-off-by: Hoang Huu Le Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 6 +-- net/tipc/bcast.h | 4 +- net/tipc/link.c | 2 +- net/tipc/msg.h | 40 +++++++++++++++++ net/tipc/name_distr.c | 116 +++++++++++++++++++++++++++++++++++++------------- net/tipc/name_distr.h | 9 ++-- net/tipc/name_table.c | 9 +++- net/tipc/name_table.h | 2 + net/tipc/node.c | 29 ++++++++++--- net/tipc/node.h | 8 ++-- 10 files changed, 177 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 383f87bc1061..940d176e0e87 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -250,8 +250,8 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, * Consumes the buffer chain. * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE */ -static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, - u16 *cong_link_cnt) +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt) { struct tipc_link *l = tipc_bc_sndlink(net); struct sk_buff_head xmitq; @@ -752,7 +752,7 @@ void tipc_nlist_purge(struct tipc_nlist *nl) nl->local = false; } -u32 tipc_bcast_get_broadcast_mode(struct net *net) +u32 tipc_bcast_get_mode(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 4240c95188b1..2d9352dc7b0e 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -90,6 +90,8 @@ void tipc_bcast_toggle_rcast(struct net *net, bool supp); int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt); +int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); @@ -101,7 +103,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l); -u32 tipc_bcast_get_broadcast_mode(struct net *net); +u32 tipc_bcast_get_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, diff --git a/net/tipc/link.c b/net/tipc/link.c index ee3b8d0576b8..eac89a3e22ce 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2745,7 +2745,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, void *hdr; struct nlattr *attrs; struct nlattr *prop; - u32 bc_mode = tipc_bcast_get_broadcast_mode(net); + u32 bc_mode = tipc_bcast_get_mode(net); u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); if (!bcl) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 58660d56bc83..65119e81ff0c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -438,6 +438,36 @@ static inline void msg_set_errcode(struct tipc_msg *m, u32 err) msg_set_bits(m, 1, 25, 0xf, err); } +static inline void msg_set_bulk(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 28, 0x1, 1); +} + +static inline u32 msg_is_bulk(struct tipc_msg *m) +{ + return msg_bits(m, 1, 28, 0x1); +} + +static inline void msg_set_last_bulk(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 27, 0x1, 1); +} + +static inline u32 msg_is_last_bulk(struct tipc_msg *m) +{ + return msg_bits(m, 1, 27, 0x1); +} + +static inline void msg_set_non_legacy(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 26, 0x1, 1); +} + +static inline u32 msg_is_legacy(struct tipc_msg *m) +{ + return !msg_bits(m, 1, 26, 0x1); +} + static inline u32 msg_reroute_cnt(struct tipc_msg *m) { return msg_bits(m, 1, 21, 0xf); @@ -567,6 +597,16 @@ static inline void msg_set_origport(struct tipc_msg *m, u32 p) msg_set_word(m, 4, p); } +static inline u16 msg_named_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + static inline u32 msg_destport(struct tipc_msg *m) { return msg_word(m, 5); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 5feaf3b67380..2f9c148f17e2 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -102,7 +102,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) pr_warn("Publication distribution failure\n"); return NULL; } - + msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); + msg_set_non_legacy(buf_msg(skb)); item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, publ); return skb; @@ -114,8 +115,8 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) { struct name_table *nt = tipc_name_table(net); - struct sk_buff *buf; struct distr_item *item; + struct sk_buff *skb; write_lock_bh(&nt->cluster_scope_lock); list_del(&publ->binding_node); @@ -123,15 +124,16 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) if (publ->scope == TIPC_NODE_SCOPE) return NULL; - buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); - if (!buf) { + skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); + if (!skb) { pr_warn("Withdrawal distribution failure\n"); return NULL; } - - item = (struct distr_item *)msg_data(buf_msg(buf)); + msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++); + msg_set_non_legacy(buf_msg(skb)); + item = (struct distr_item *)msg_data(buf_msg(skb)); publ_to_item(item, publ); - return buf; + return skb; } /** @@ -141,7 +143,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) * @pls: linked list of publication items to be packed into buffer chain */ static void named_distribute(struct net *net, struct sk_buff_head *list, - u32 dnode, struct list_head *pls) + u32 dnode, struct list_head *pls, u16 seqno) { struct publication *publ; struct sk_buff *skb = NULL; @@ -149,6 +151,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0, false) - INT_H_SIZE) / ITEM_SIZE) * ITEM_SIZE; u32 msg_rem = msg_dsz; + struct tipc_msg *hdr; list_for_each_entry(publ, pls, binding_node) { /* Prepare next buffer: */ @@ -159,8 +162,11 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, pr_warn("Bulk publication failure\n"); return; } - msg_set_bc_ack_invalid(buf_msg(skb), true); - item = (struct distr_item *)msg_data(buf_msg(skb)); + hdr = buf_msg(skb); + msg_set_bc_ack_invalid(hdr, true); + msg_set_bulk(hdr); + msg_set_non_legacy(hdr); + item = (struct distr_item *)msg_data(hdr); } /* Pack publication into message: */ @@ -176,24 +182,35 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, } } if (skb) { - msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem)); + hdr = buf_msg(skb); + msg_set_size(hdr, INT_H_SIZE + (msg_dsz - msg_rem)); skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); __skb_queue_tail(list, skb); } + hdr = buf_msg(skb_peek_tail(list)); + msg_set_last_bulk(hdr); + msg_set_named_seqno(hdr, seqno); } /** * tipc_named_node_up - tell specified node about all publications by this node */ -void tipc_named_node_up(struct net *net, u32 dnode) +void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities) { struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); struct sk_buff_head head; + u16 seqno; __skb_queue_head_init(&head); + spin_lock_bh(&tn->nametbl_lock); + if (!(capabilities & TIPC_NAMED_BCAST)) + nt->rc_dests++; + seqno = nt->snd_nxt; + spin_unlock_bh(&tn->nametbl_lock); read_lock_bh(&nt->cluster_scope_lock); - named_distribute(net, &head, dnode, &nt->cluster_scope); + named_distribute(net, &head, dnode, &nt->cluster_scope, seqno); tipc_node_xmit(net, &head, dnode, 0); read_unlock_bh(&nt->cluster_scope_lock); } @@ -245,13 +262,21 @@ static void tipc_dist_queue_purge(struct net *net, u32 addr) spin_unlock_bh(&tn->nametbl_lock); } -void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, + u32 addr, u16 capabilities) { + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) tipc_publ_purge(net, publ, addr); tipc_dist_queue_purge(net, addr); + spin_lock_bh(&tn->nametbl_lock); + if (!(capabilities & TIPC_NAMED_BCAST)) + nt->rc_dests--; + spin_unlock_bh(&tn->nametbl_lock); } /** @@ -295,29 +320,62 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, return false; } +static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open) +{ + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + u16 seqno; + + skb_queue_walk_safe(namedq, skb, tmp) { + skb_linearize(skb); + hdr = buf_msg(skb); + seqno = msg_named_seqno(hdr); + if (msg_is_last_bulk(hdr)) { + *rcv_nxt = seqno; + *open = true; + } + + if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { + __skb_unlink(skb, namedq); + return skb; + } + + if (*open && (*rcv_nxt == seqno)) { + (*rcv_nxt)++; + __skb_unlink(skb, namedq); + return skb; + } + + if (less(seqno, *rcv_nxt)) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } + } + return NULL; +} + /** * tipc_named_rcv - process name table update messages sent by another node */ -void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) +void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - struct tipc_msg *msg; + struct tipc_net *tn = tipc_net(net); struct distr_item *item; - uint count; - u32 node; + struct tipc_msg *hdr; struct sk_buff *skb; - int mtype; + u32 count, node; spin_lock_bh(&tn->nametbl_lock); - for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { - skb_linearize(skb); - msg = buf_msg(skb); - mtype = msg_type(msg); - item = (struct distr_item *)msg_data(msg); - count = msg_data_sz(msg) / ITEM_SIZE; - node = msg_orignode(msg); + while ((skb = tipc_named_dequeue(namedq, rcv_nxt, open))) { + hdr = buf_msg(skb); + node = msg_orignode(hdr); + item = (struct distr_item *)msg_data(hdr); + count = msg_data_sz(hdr) / ITEM_SIZE; while (count--) { - tipc_update_nametbl(net, item, node, mtype); + tipc_update_nametbl(net, item, node, msg_type(hdr)); item++; } kfree_skb(skb); @@ -345,6 +403,6 @@ void tipc_named_reinit(struct net *net) publ->node = self; list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) publ->node = self; - + nt->rc_dests = 0; spin_unlock_bh(&tn->nametbl_lock); } diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h index 63fc73e0fa6c..092323158f06 100644 --- a/net/tipc/name_distr.h +++ b/net/tipc/name_distr.h @@ -67,11 +67,14 @@ struct distr_item { __be32 key; }; +void tipc_named_bcast(struct net *net, struct sk_buff *skb); struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); -void tipc_named_node_up(struct net *net, u32 dnode); -void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); +void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities); +void tipc_named_rcv(struct net *net, struct sk_buff_head *namedq, + u16 *rcv_nxt, bool *open); void tipc_named_reinit(struct net *net); -void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, + u32 addr, u16 capabilities); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 359b2bc888cf..2ac33d32edc2 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -729,6 +729,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, struct tipc_net *tn = tipc_net(net); struct publication *p = NULL; struct sk_buff *skb = NULL; + u32 rc_dests; spin_lock_bh(&tn->nametbl_lock); @@ -743,12 +744,14 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, nt->local_publ_count++; skb = tipc_named_publish(net, p); } + rc_dests = nt->rc_dests; exit: spin_unlock_bh(&tn->nametbl_lock); if (skb) - tipc_node_broadcast(net, skb); + tipc_node_broadcast(net, skb, rc_dests); return p; + } /** @@ -762,6 +765,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 self = tipc_own_addr(net); struct sk_buff *skb = NULL; struct publication *p; + u32 rc_dests; spin_lock_bh(&tn->nametbl_lock); @@ -775,10 +779,11 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, pr_err("Failed to remove local publication {%u,%u,%u}/%u\n", type, lower, upper, key); } + rc_dests = nt->rc_dests; spin_unlock_bh(&tn->nametbl_lock); if (skb) { - tipc_node_broadcast(net, skb); + tipc_node_broadcast(net, skb, rc_dests); return 1; } return 0; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 728bc7016c38..8064e1986e2c 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -106,6 +106,8 @@ struct name_table { struct list_head cluster_scope; rwlock_t cluster_scope_lock; u32 local_publ_count; + u32 rc_dests; + u32 snd_nxt; }; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/node.c b/net/tipc/node.c index a4c2816c3746..030a51c4d1fa 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -75,6 +75,8 @@ struct tipc_bclink_entry { struct sk_buff_head arrvq; struct sk_buff_head inputq2; struct sk_buff_head namedq; + u16 named_rcv_nxt; + bool named_open; }; /** @@ -396,10 +398,10 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr); + tipc_publ_notify(net, publ_list, addr, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr); + tipc_named_node_up(net, addr, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { tipc_mon_peer_up(net, addr, bearer_id); @@ -1483,6 +1485,7 @@ static void node_lost_contact(struct tipc_node *n, /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); + __skb_queue_purge(&n->bc_entry.namedq); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { @@ -1729,12 +1732,23 @@ int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) return 0; } -void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests) { + struct sk_buff_head xmitq; struct sk_buff *txskb; struct tipc_node *n; + u16 dummy; u32 dst; + /* Use broadcast if all nodes support it */ + if (!rc_dests && tipc_bcast_get_mode(net) != BCLINK_MODE_RCAST) { + __skb_queue_head_init(&xmitq); + __skb_queue_tail(&xmitq, skb); + tipc_bcast_xmit(net, &xmitq, &dummy); + return; + } + + /* Otherwise use legacy replicast method */ rcu_read_lock(); list_for_each_entry_rcu(n, tipc_nodes(net), list) { dst = n->addr; @@ -1749,7 +1763,6 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) tipc_node_xmit_skb(net, txskb, dst, 0); } rcu_read_unlock(); - kfree_skb(skb); } @@ -1844,7 +1857,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id /* Handle NAME_DISTRIBUTOR messages sent from 1.7 nodes */ if (!skb_queue_empty(&n->bc_entry.namedq)) - tipc_named_rcv(net, &n->bc_entry.namedq); + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); /* If reassembly or retransmission failure => reset all links to peer */ if (rc & TIPC_LINK_DOWN_EVT) @@ -2114,7 +2129,9 @@ rcv: tipc_node_link_down(n, bearer_id, false); if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) - tipc_named_rcv(net, &n->bc_entry.namedq); + tipc_named_rcv(net, &n->bc_entry.namedq, + &n->bc_entry.named_rcv_nxt, + &n->bc_entry.named_open); if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) tipc_node_mcast_rcv(n); diff --git a/net/tipc/node.h b/net/tipc/node.h index a6803b449a2c..9f6f13f1604f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -55,7 +55,8 @@ enum { TIPC_MCAST_RBCTL = (1 << 7), TIPC_GAP_ACK_BLOCK = (1 << 8), TIPC_TUNNEL_ENHANCED = (1 << 9), - TIPC_NAGLE = (1 << 10) + TIPC_NAGLE = (1 << 10), + TIPC_NAMED_BCAST = (1 << 11) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -68,7 +69,8 @@ enum { TIPC_MCAST_RBCTL | \ TIPC_GAP_ACK_BLOCK | \ TIPC_TUNNEL_ENHANCED | \ - TIPC_NAGLE) + TIPC_NAGLE | \ + TIPC_NAMED_BCAST) #define INVALID_BEARER_ID -1 @@ -101,7 +103,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); -void tipc_node_broadcast(struct net *net, struct sk_buff *skb); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel, bool connected); -- cgit From 3dd1499666f64817173c6390ed4d387471d07df8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 Jun 2020 18:01:07 -0500 Subject: ethtool: ioctl: Use array_size() in copy_to_user() Use array_size() helper instead of the open-coded version in copy_to_user(). These sorts of multiplication factors need to be wrapped in array_size(). This issue was found with the help of Coccinelle and, audited and fixed manually. Addresses-KSPP-ID: https://github.com/KSPP/linux/issues/83 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b5df90c981c2..be3ed24bfe03 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1918,7 +1918,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) if (copy_to_user(useraddr, &stats, sizeof(stats))) goto out; useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) + if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) goto out; ret = 0; @@ -1973,7 +1973,7 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (copy_to_user(useraddr, &stats, sizeof(stats))) goto out; useraddr += sizeof(stats); - if (n_stats && copy_to_user(useraddr, data, n_stats * sizeof(u64))) + if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64)))) goto out; ret = 0; -- cgit From aececa645dc79ec004bfed3357c15cbf4b9b5746 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 17 Jun 2020 16:39:07 +0200 Subject: Bluetooth: mgmt: Add commands for runtime configuration This adds the required read/set commands for runtime configuration. Even while currently no parameters are specified, the commands are made available. Signed-off-by: Marcel Holtmann Reviewed-by: Alain Michaud Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 7 +++++++ net/bluetooth/mgmt_config.c | 18 ++++++++++++++++++ net/bluetooth/mgmt_config.h | 6 ++++++ 3 files changed, 31 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 99fbfd467d04..ecfdfc4df486 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -114,6 +114,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_EXP_FEATURE, MGMT_OP_READ_DEF_SYSTEM_CONFIG, MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_OP_READ_DEF_RUNTIME_CONFIG, + MGMT_OP_SET_DEF_RUNTIME_CONFIG, }; static const u16 mgmt_events[] = { @@ -166,6 +168,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_SECURITY_INFO, MGMT_OP_READ_EXP_FEATURES_INFO, MGMT_OP_READ_DEF_SYSTEM_CONFIG, + MGMT_OP_READ_DEF_RUNTIME_CONFIG, }; static const u16 mgmt_untrusted_events[] = { @@ -7305,6 +7308,10 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_UNTRUSTED }, { set_def_system_config, MGMT_SET_DEF_SYSTEM_CONFIG_SIZE, HCI_MGMT_VAR_LEN }, + { read_def_runtime_config, MGMT_READ_DEF_RUNTIME_CONFIG_SIZE, + HCI_MGMT_UNTRUSTED }, + { set_def_runtime_config, MGMT_SET_DEF_RUNTIME_CONFIG_SIZE, + HCI_MGMT_VAR_LEN }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c index f6dfbe93542c..8e7ad2a51dbb 100644 --- a/net/bluetooth/mgmt_config.c +++ b/net/bluetooth/mgmt_config.c @@ -251,3 +251,21 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, MGMT_OP_SET_DEF_SYSTEM_CONFIG, MGMT_STATUS_SUCCESS); } + +int read_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len) +{ + bt_dev_dbg(hdev, "sock %p", sk); + + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_DEF_RUNTIME_CONFIG, 0, NULL, 0); +} + +int set_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len) +{ + bt_dev_dbg(hdev, "sock %p", sk); + + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEF_SYSTEM_CONFIG, + MGMT_STATUS_INVALID_PARAMS); +} diff --git a/net/bluetooth/mgmt_config.h b/net/bluetooth/mgmt_config.h index 51da6e63b1a0..a4965f107891 100644 --- a/net/bluetooth/mgmt_config.h +++ b/net/bluetooth/mgmt_config.h @@ -9,3 +9,9 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len); + +int read_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len); + +int set_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len); -- cgit From 8baaa4038edbff67f318574e233e9e7e43808230 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:08 +0200 Subject: Bluetooth: Add bdaddr_list_with_flags for classic whitelist In order to more easily add device flags to classic devices, create a new type of bdaddr_list that supports setting flags. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_event.c | 8 +++---- net/bluetooth/mgmt.c | 5 ++-- 3 files changed, 65 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4f1052a7c488..8a471bec2731 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3023,6 +3023,20 @@ struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( return NULL; } +struct bdaddr_list_with_flags * +hci_bdaddr_list_lookup_with_flags(struct list_head *bdaddr_list, + bdaddr_t *bdaddr, u8 type) +{ + struct bdaddr_list_with_flags *b; + + list_for_each_entry(b, bdaddr_list, list) { + if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) + return b; + } + + return NULL; +} + void hci_bdaddr_list_clear(struct list_head *bdaddr_list) { struct bdaddr_list *b, *n; @@ -3084,6 +3098,30 @@ int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, return 0; } +int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr, + u8 type, u32 flags) +{ + struct bdaddr_list_with_flags *entry; + + if (!bacmp(bdaddr, BDADDR_ANY)) + return -EBADF; + + if (hci_bdaddr_list_lookup(list, bdaddr, type)) + return -EEXIST; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + bacpy(&entry->bdaddr, bdaddr); + entry->bdaddr_type = type; + entry->current_flags = flags; + + list_add(&entry->list, list); + + return 0; +} + int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *entry; @@ -3123,6 +3161,26 @@ int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, return 0; } +int hci_bdaddr_list_del_with_flags(struct list_head *list, bdaddr_t *bdaddr, + u8 type) +{ + struct bdaddr_list_with_flags *entry; + + if (!bacmp(bdaddr, BDADDR_ANY)) { + hci_bdaddr_list_clear(list); + return 0; + } + + entry = hci_bdaddr_list_lookup_with_flags(list, bdaddr, type); + if (!entry) + return -ENOENT; + + list_del(&entry->list); + kfree(entry); + + return 0; +} + /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cfeaee347db3..8981954ff4c4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2697,10 +2697,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) */ if (hci_dev_test_flag(hdev, HCI_MGMT) && !hci_dev_test_flag(hdev, HCI_CONNECTABLE) && - !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr, - BDADDR_BREDR)) { - hci_reject_conn(hdev, &ev->bdaddr); - return; + !hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &ev->bdaddr, + BDADDR_BREDR)) { + hci_reject_conn(hdev, &ev->bdaddr); + return; } /* Connection accepted */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ecfdfc4df486..d0d0fa832c8a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6000,8 +6000,9 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - err = hci_bdaddr_list_add(&hdev->whitelist, &cp->addr.bdaddr, - cp->addr.type); + err = hci_bdaddr_list_add_with_flags(&hdev->whitelist, + &cp->addr.bdaddr, + cp->addr.type, 0); if (err) goto unlock; -- cgit From 7a92906f841db46a91df0179459ad8b2052f2e54 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:09 +0200 Subject: Bluetooth: Replace wakeable list with flag Since the classic device list now supports flags, convert the wakeable list into a flag on the existing device list. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 1 - net/bluetooth/hci_request.c | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8a471bec2731..8e01afb2ee8c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3499,7 +3499,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->blacklist); INIT_LIST_HEAD(&hdev->whitelist); - INIT_LIST_HEAD(&hdev->wakeable); INIT_LIST_HEAD(&hdev->uuids); INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index a7f572ad38ef..a5b53d3ea508 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -968,15 +968,19 @@ static void hci_req_clear_event_filter(struct hci_request *req) static void hci_req_set_event_filter(struct hci_request *req) { - struct bdaddr_list *b; + struct bdaddr_list_with_flags *b; struct hci_cp_set_event_filter f; struct hci_dev *hdev = req->hdev; - u8 scan; + u8 scan = SCAN_DISABLED; /* Always clear event filter when starting */ hci_req_clear_event_filter(req); - list_for_each_entry(b, &hdev->wakeable, list) { + list_for_each_entry(b, &hdev->whitelist, list) { + if (!hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP, + b->current_flags)) + continue; + memset(&f, 0, sizeof(f)); bacpy(&f.addr_conn_flt.bdaddr, &b->bdaddr); f.flt_type = HCI_FLT_CONN_SETUP; @@ -985,9 +989,9 @@ static void hci_req_set_event_filter(struct hci_request *req) bt_dev_dbg(hdev, "Adding event filters for %pMR", &b->bdaddr); hci_req_add(req, HCI_OP_SET_EVENT_FLT, sizeof(f), &f); + scan = SCAN_PAGE; } - scan = !list_empty(&hdev->wakeable) ? SCAN_PAGE : SCAN_DISABLED; hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -- cgit From a1fc7535ec34a5904abe93dd42a6ed7e31c36717 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:10 +0200 Subject: Bluetooth: Replace wakeable in hci_conn_params Replace the wakeable boolean with flags in hci_conn_params and all users of this boolean. This will be used by the get/set device flags mgmt op. Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_request.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index a5b53d3ea508..eee9c007a5fb 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -710,7 +710,8 @@ static int add_to_white_list(struct hci_request *req, } /* During suspend, only wakeable devices can be in whitelist */ - if (hdev->suspended && !params->wakeable) + if (hdev->suspended && !hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP, + params->current_flags)) return 0; *num_entries += 1; -- cgit From 4c54bf2b093bb2ae95e756342646d868e8101cb4 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 17 Jun 2020 16:39:11 +0200 Subject: Bluetooth: Add get/set device flags mgmt op Add the get device flags and set device flags mgmt ops and the device flags changed event. Their behavior is described in detail in mgmt-api.txt in bluez. Sample btmon trace when a HID device is added (trimmed to 75 chars): @ MGMT Command: Unknown (0x0050) plen 11 {0x0001} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 ........... @ MGMT Event: Unknown (0x002a) plen 15 {0x0004} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0003} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0002} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Command Compl.. (0x0001) plen 10 {0x0001} [hci0] 18:06:14.98 Unknown (0x0050) plen 7 Status: Success (0x00) 90 c5 13 cd f3 cd 02 ....... @ MGMT Command: Add Device (0x0033) plen 8 {0x0001} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Device Added (0x001a) plen 8 {0x0004} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Device Added (0x001a) plen 8 {0x0003} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Device Added (0x001a) plen 8 {0x0002} [hci0] 18:06:14.98 LE Address: CD:F3:CD:13:C5:90 (Static) Action: Auto-connect remote device (0x02) @ MGMT Event: Unknown (0x002a) plen 15 {0x0004} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0003} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0002} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... @ MGMT Event: Unknown (0x002a) plen 15 {0x0001} [hci0] 18:06:14.98 90 c5 13 cd f3 cd 02 01 00 00 00 01 00 00 00 ............... Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d0d0fa832c8a..e409ff48e8e6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -116,6 +116,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DEF_SYSTEM_CONFIG, MGMT_OP_READ_DEF_RUNTIME_CONFIG, MGMT_OP_SET_DEF_RUNTIME_CONFIG, + MGMT_OP_GET_DEVICE_FLAGS, + MGMT_OP_SET_DEVICE_FLAGS, }; static const u16 mgmt_events[] = { @@ -156,6 +158,7 @@ static const u16 mgmt_events[] = { MGMT_EV_EXT_INFO_CHANGED, MGMT_EV_PHY_CONFIGURATION_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, + MGMT_EV_DEVICE_FLAGS_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -3856,6 +3859,120 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_NOT_SUPPORTED); } +#define SUPPORTED_DEVICE_FLAGS() ((1U << HCI_CONN_FLAG_MAX) - 1) + +static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, + u16 data_len) +{ + struct mgmt_cp_get_device_flags *cp = data; + struct mgmt_rp_get_device_flags rp; + struct bdaddr_list_with_flags *br_params; + struct hci_conn_params *params; + u32 supported_flags = SUPPORTED_DEVICE_FLAGS(); + u32 current_flags = 0; + u8 status = MGMT_STATUS_INVALID_PARAMS; + + bt_dev_dbg(hdev, "Get device flags %pMR (type 0x%x)\n", + &cp->addr.bdaddr, cp->addr.type); + + if (cp->addr.type == BDADDR_BREDR) { + br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, + &cp->addr.bdaddr, + cp->addr.type); + if (!br_params) + goto done; + + current_flags = br_params->current_flags; + } else { + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + + if (!params) + goto done; + + current_flags = params->current_flags; + } + + bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr); + rp.addr.type = cp->addr.type; + rp.supported_flags = cpu_to_le32(supported_flags); + rp.current_flags = cpu_to_le32(current_flags); + + status = MGMT_STATUS_SUCCESS; + +done: + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_DEVICE_FLAGS, status, + &rp, sizeof(rp)); +} + +static void device_flags_changed(struct sock *sk, struct hci_dev *hdev, + bdaddr_t *bdaddr, u8 bdaddr_type, + u32 supported_flags, u32 current_flags) +{ + struct mgmt_ev_device_flags_changed ev; + + bacpy(&ev.addr.bdaddr, bdaddr); + ev.addr.type = bdaddr_type; + ev.supported_flags = cpu_to_le32(supported_flags); + ev.current_flags = cpu_to_le32(current_flags); + + mgmt_event(MGMT_EV_DEVICE_FLAGS_CHANGED, hdev, &ev, sizeof(ev), sk); +} + +static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_set_device_flags *cp = data; + struct bdaddr_list_with_flags *br_params; + struct hci_conn_params *params; + u8 status = MGMT_STATUS_INVALID_PARAMS; + u32 supported_flags = SUPPORTED_DEVICE_FLAGS(); + u32 current_flags = __le32_to_cpu(cp->current_flags); + + bt_dev_dbg(hdev, "Set device flags %pMR (type 0x%x) = 0x%x", + &cp->addr.bdaddr, cp->addr.type, + __le32_to_cpu(current_flags)); + + if ((supported_flags | current_flags) != supported_flags) { + bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)", + current_flags, supported_flags); + goto done; + } + + if (cp->addr.type == BDADDR_BREDR) { + br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, + &cp->addr.bdaddr, + cp->addr.type); + + if (br_params) { + br_params->current_flags = current_flags; + status = MGMT_STATUS_SUCCESS; + } else { + bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)", + &cp->addr.bdaddr, cp->addr.type); + } + } else { + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + if (params) { + params->current_flags = current_flags; + status = MGMT_STATUS_SUCCESS; + } else { + bt_dev_warn(hdev, "No such LE device %pMR (0x%x)", + &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + } + } + +done: + if (status == MGMT_STATUS_SUCCESS) + device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type, + supported_flags, current_flags); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_FLAGS, status, + &cp->addr, sizeof(cp->addr)); +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -5973,7 +6090,9 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_add_device *cp = data; u8 auto_conn, addr_type; + struct hci_conn_params *params; int err; + u32 current_flags = 0; bt_dev_dbg(hdev, "sock %p", sk); @@ -6041,12 +6160,19 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_FAILED, &cp->addr, sizeof(cp->addr)); goto unlock; + } else { + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + addr_type); + if (params) + current_flags = params->current_flags; } hci_update_background_scan(hdev); added: device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); + device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type, + SUPPORTED_DEVICE_FLAGS(), current_flags); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE, MGMT_STATUS_SUCCESS, &cp->addr, @@ -7313,6 +7439,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_UNTRUSTED }, { set_def_runtime_config, MGMT_SET_DEF_RUNTIME_CONFIG_SIZE, HCI_MGMT_VAR_LEN }, + { get_device_flags, MGMT_GET_DEVICE_FLAGS_SIZE }, + { set_device_flags, MGMT_SET_DEVICE_FLAGS_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit From e5e1e7fd470ccf2eb38ab7fb5a3ab0fc4792fe53 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:13 +0200 Subject: Bluetooth: Add handler of MGMT_OP_READ_ADV_MONITOR_FEATURES This adds the request handler of MGMT_OP_READ_ADV_MONITOR_FEATURES command. Since the controller-based monitoring is not yet in place, this report only the supported features but not the enabled features. The following test was performed. - Issuing btmgmt advmon-features. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 10 +++++++++- net/bluetooth/mgmt.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/msft.c | 7 +++++++ net/bluetooth/msft.h | 9 +++++++++ 4 files changed, 73 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8e01afb2ee8c..53aec32a5850 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -26,7 +26,6 @@ /* Bluetooth HCI core. */ #include -#include #include #include #include @@ -2996,6 +2995,12 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, return 0; } +/* This function requires the caller holds hdev->lock */ +void hci_adv_monitors_clear(struct hci_dev *hdev) +{ + idr_destroy(&hdev->adv_monitors_idr); +} + struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { @@ -3646,6 +3651,8 @@ int hci_register_dev(struct hci_dev *hdev) queue_work(hdev->req_workqueue, &hdev->power_on); + idr_init(&hdev->adv_monitors_idr); + return id; err_wqueue: @@ -3716,6 +3723,7 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_smp_irks_clear(hdev); hci_remote_oob_data_clear(hdev); hci_adv_instances_clear(hdev); + hci_adv_monitors_clear(hdev); hci_bdaddr_list_clear(&hdev->le_white_list); hci_bdaddr_list_clear(&hdev->le_resolv_list); hci_conn_params_clear_all(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e409ff48e8e6..8aec7fbe9a38 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -37,6 +37,7 @@ #include "smp.h" #include "mgmt_util.h" #include "mgmt_config.h" +#include "msft.h" #define MGMT_VERSION 1 #define MGMT_REVISION 17 @@ -118,6 +119,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DEF_RUNTIME_CONFIG, MGMT_OP_GET_DEVICE_FLAGS, MGMT_OP_SET_DEVICE_FLAGS, + MGMT_OP_READ_ADV_MONITOR_FEATURES, }; static const u16 mgmt_events[] = { @@ -3973,6 +3975,51 @@ done: &cp->addr, sizeof(cp->addr)); } +static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct adv_monitor *monitor = NULL; + struct mgmt_rp_read_adv_monitor_features *rp = NULL; + int handle; + size_t rp_size = 0; + __u32 supported = 0; + __u16 num_handles = 0; + __u16 handles[HCI_MAX_ADV_MONITOR_NUM_HANDLES]; + + BT_DBG("request for %s", hdev->name); + + hci_dev_lock(hdev); + + if (msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR) + supported |= MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS; + + idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) { + handles[num_handles++] = monitor->handle; + } + + hci_dev_unlock(hdev); + + rp_size = sizeof(*rp) + (num_handles * sizeof(u16)); + rp = kmalloc(rp_size, GFP_KERNEL); + if (!rp) + return -ENOMEM; + + /* Once controller-based monitoring is in place, the enabled_features + * should reflect the use. + */ + rp->supported_features = cpu_to_le32(supported); + rp->enabled_features = 0; + rp->max_num_handles = cpu_to_le16(HCI_MAX_ADV_MONITOR_NUM_HANDLES); + rp->max_num_patterns = HCI_MAX_ADV_MONITOR_NUM_PATTERNS; + rp->num_handles = cpu_to_le16(num_handles); + if (num_handles) + memcpy(&rp->handles, &handles, (num_handles * sizeof(u16))); + + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_READ_ADV_MONITOR_FEATURES, + MGMT_STATUS_SUCCESS, rp, rp_size); +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -7441,6 +7488,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_VAR_LEN }, { get_device_flags, MGMT_GET_DEVICE_FLAGS_SIZE }, { set_device_flags, MGMT_SET_DEVICE_FLAGS_SIZE }, + { read_adv_mon_features, MGMT_READ_ADV_MONITOR_FEATURES_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index d6c4e6b5ae77..8579bfeb2836 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -139,3 +139,10 @@ void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) bt_dev_dbg(hdev, "MSFT vendor event %u", event); } + +__u64 msft_get_features(struct hci_dev *hdev) +{ + struct msft_data *msft = hdev->msft_data; + + return msft ? msft->features : 0; +} diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index 5aa9130e1f8a..e9c478e890b8 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -3,16 +3,25 @@ * Copyright (C) 2020 Google Corporation */ +#define MSFT_FEATURE_MASK_BREDR_RSSI_MONITOR BIT(0) +#define MSFT_FEATURE_MASK_LE_CONN_RSSI_MONITOR BIT(1) +#define MSFT_FEATURE_MASK_LE_ADV_RSSI_MONITOR BIT(2) +#define MSFT_FEATURE_MASK_LE_ADV_MONITOR BIT(3) +#define MSFT_FEATURE_MASK_CURVE_VALIDITY BIT(4) +#define MSFT_FEATURE_MASK_CONCURRENT_ADV_MONITOR BIT(5) + #if IS_ENABLED(CONFIG_BT_MSFTEXT) void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb); +__u64 msft_get_features(struct hci_dev *hdev); #else static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {} +static inline __u64 msft_get_features(struct hci_dev *hdev) { return 0; } #endif -- cgit From b139553db5cd940d66095fb97de1727e9a19369f Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:14 +0200 Subject: Bluetooth: Add handler of MGMT_OP_ADD_ADV_PATTERNS_MONITOR This adds the request handler of MGMT_OP_ADD_ADV_PATTERNS_MONITOR command. Note that the controller-based monitoring is not yet in place. This tracks the content of the monitor without sending HCI traffic, so the request returns immediately. The following manual test was performed. - Issue btmgmt advmon-add with valid and invalid inputs. - Issue btmgmt advmon-add more the allowed number of monitors. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 40 +++++++++++++++++++ net/bluetooth/mgmt.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 53aec32a5850..ce481fab349d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2998,9 +2998,49 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, /* This function requires the caller holds hdev->lock */ void hci_adv_monitors_clear(struct hci_dev *hdev) { + struct adv_monitor *monitor; + int handle; + + idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) + hci_free_adv_monitor(monitor); + idr_destroy(&hdev->adv_monitors_idr); } +void hci_free_adv_monitor(struct adv_monitor *monitor) +{ + struct adv_pattern *pattern; + struct adv_pattern *tmp; + + if (!monitor) + return; + + list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list) + kfree(pattern); + + kfree(monitor); +} + +/* This function requires the caller holds hdev->lock */ +int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) +{ + int min, max, handle; + + if (!monitor) + return -EINVAL; + + min = HCI_MIN_ADV_MONITOR_HANDLE; + max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES; + handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max, + GFP_KERNEL); + if (handle < 0) + return handle; + + hdev->adv_monitors_cnt++; + monitor->handle = handle; + return 0; +} + struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8aec7fbe9a38..1eca36e51706 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -120,6 +120,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_GET_DEVICE_FLAGS, MGMT_OP_SET_DEVICE_FLAGS, MGMT_OP_READ_ADV_MONITOR_FEATURES, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR, }; static const u16 mgmt_events[] = { @@ -4020,6 +4021,103 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, MGMT_STATUS_SUCCESS, rp, rp_size); } +static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_add_adv_patterns_monitor *cp = data; + struct mgmt_rp_add_adv_patterns_monitor rp; + struct adv_monitor *m = NULL; + struct adv_pattern *p = NULL; + __u8 cp_ofst = 0, cp_len = 0; + unsigned int mp_cnt = 0; + int err, i; + + BT_DBG("request for %s", hdev->name); + + if (len <= sizeof(*cp) || cp->pattern_count == 0) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_STATUS_INVALID_PARAMS); + goto failed; + } + + m = kmalloc(sizeof(*m), GFP_KERNEL); + if (!m) { + err = -ENOMEM; + goto failed; + } + + INIT_LIST_HEAD(&m->patterns); + m->active = false; + + for (i = 0; i < cp->pattern_count; i++) { + if (++mp_cnt > HCI_MAX_ADV_MONITOR_NUM_PATTERNS) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_STATUS_INVALID_PARAMS); + goto failed; + } + + cp_ofst = cp->patterns[i].offset; + cp_len = cp->patterns[i].length; + if (cp_ofst >= HCI_MAX_AD_LENGTH || + cp_len > HCI_MAX_AD_LENGTH || + (cp_ofst + cp_len) > HCI_MAX_AD_LENGTH) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_STATUS_INVALID_PARAMS); + goto failed; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + err = -ENOMEM; + goto failed; + } + + p->ad_type = cp->patterns[i].ad_type; + p->offset = cp->patterns[i].offset; + p->length = cp->patterns[i].length; + memcpy(p->value, cp->patterns[i].value, p->length); + + INIT_LIST_HEAD(&p->list); + list_add(&p->list, &m->patterns); + } + + if (mp_cnt != cp->pattern_count) { + err = mgmt_cmd_status(sk, hdev->id, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_STATUS_INVALID_PARAMS); + goto failed; + } + + hci_dev_lock(hdev); + + err = hci_add_adv_monitor(hdev, m); + if (err) { + if (err == -ENOSPC) { + mgmt_cmd_status(sk, hdev->id, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_STATUS_NO_RESOURCES); + } + goto unlock; + } + + hci_dev_unlock(hdev); + + rp.monitor_handle = cpu_to_le16(m->handle); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + +unlock: + hci_dev_unlock(hdev); + +failed: + hci_free_adv_monitor(m); + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -7489,6 +7587,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { get_device_flags, MGMT_GET_DEVICE_FLAGS_SIZE }, { set_device_flags, MGMT_SET_DEVICE_FLAGS_SIZE }, { read_adv_mon_features, MGMT_READ_ADV_MONITOR_FEATURES_SIZE }, + { add_adv_patterns_monitor,MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE, + HCI_MGMT_VAR_LEN }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit From bd2fbc6cb815b5171facb42526f6db206d920e13 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:15 +0200 Subject: Bluetooth: Add handler of MGMT_OP_REMOVE_ADV_MONITOR This adds the request handler of MGMT_OP_REMOVE_ADV_MONITOR command. Note that the controller-based monitoring is not yet in place. This removes the internal monitor(s) without sending HCI traffic, so the request returns immediately. The following test was performed. - Issue btmgmt advmon-remove with valid and invalid handles. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 31 +++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ce481fab349d..59132b3e2cde 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3041,6 +3041,37 @@ int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) return 0; } +static int free_adv_monitor(int id, void *ptr, void *data) +{ + struct hci_dev *hdev = data; + struct adv_monitor *monitor = ptr; + + idr_remove(&hdev->adv_monitors_idr, monitor->handle); + hci_free_adv_monitor(monitor); + + return 0; +} + +/* This function requires the caller holds hdev->lock */ +int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle) +{ + struct adv_monitor *monitor; + + if (handle) { + monitor = idr_find(&hdev->adv_monitors_idr, handle); + if (!monitor) + return -ENOENT; + + idr_remove(&hdev->adv_monitors_idr, monitor->handle); + hci_free_adv_monitor(monitor); + } else { + /* Remove all monitors if handle is 0. */ + idr_for_each(&hdev->adv_monitors_idr, &free_adv_monitor, hdev); + } + + return 0; +} + struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1eca36e51706..cff24fde72d2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -121,6 +121,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_SET_DEVICE_FLAGS, MGMT_OP_READ_ADV_MONITOR_FEATURES, MGMT_OP_ADD_ADV_PATTERNS_MONITOR, + MGMT_OP_REMOVE_ADV_MONITOR, }; static const u16 mgmt_events[] = { @@ -4118,6 +4119,39 @@ failed: return err; } +static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_remove_adv_monitor *cp = data; + struct mgmt_rp_remove_adv_monitor rp; + u16 handle; + int err; + + BT_DBG("request for %s", hdev->name); + + hci_dev_lock(hdev); + + handle = __le16_to_cpu(cp->monitor_handle); + + err = hci_remove_adv_monitor(hdev, handle); + if (err == -ENOENT) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR, + MGMT_STATUS_INVALID_INDEX); + goto unlock; + } + + hci_dev_unlock(hdev); + + rp.monitor_handle = cp->monitor_handle; + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + +unlock: + hci_dev_unlock(hdev); + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -7589,6 +7623,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { read_adv_mon_features, MGMT_READ_ADV_MONITOR_FEATURES_SIZE }, { add_adv_patterns_monitor,MGMT_ADD_ADV_PATTERNS_MONITOR_SIZE, HCI_MGMT_VAR_LEN }, + { remove_adv_monitor, MGMT_REMOVE_ADV_MONITOR_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit From b52729f27b1e3957bef7306d47abf9cd855524e7 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:16 +0200 Subject: Bluetooth: Notify adv monitor added event This notifies management sockets on MGMT_EV_ADV_MONITOR_ADDED event. The following test was performed. - Start two btmgmt consoles, issue a btmgmt advmon-add command on one console and observe a MGMT_EV_ADV_MONITOR_ADDED event on the other Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index cff24fde72d2..3268d9a00608 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -191,6 +191,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_EXT_INDEX_REMOVED, MGMT_EV_EXT_INFO_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, + MGMT_EV_ADV_MONITOR_ADDED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -3977,6 +3978,16 @@ done: &cp->addr, sizeof(cp->addr)); } +static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, + u16 handle) +{ + struct mgmt_ev_adv_monitor_added ev; + + ev.monitor_handle = cpu_to_le16(handle); + + mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); +} + static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -4029,8 +4040,8 @@ static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, struct mgmt_rp_add_adv_patterns_monitor rp; struct adv_monitor *m = NULL; struct adv_pattern *p = NULL; + unsigned int mp_cnt = 0, prev_adv_monitors_cnt; __u8 cp_ofst = 0, cp_len = 0; - unsigned int mp_cnt = 0; int err, i; BT_DBG("request for %s", hdev->name); @@ -4094,6 +4105,8 @@ static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); + prev_adv_monitors_cnt = hdev->adv_monitors_cnt; + err = hci_add_adv_monitor(hdev, m); if (err) { if (err == -ENOSPC) { @@ -4104,6 +4117,9 @@ static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } + if (hdev->adv_monitors_cnt > prev_adv_monitors_cnt) + mgmt_adv_monitor_added(sk, hdev, m->handle); + hci_dev_unlock(hdev); rp.monitor_handle = cpu_to_le16(m->handle); -- cgit From cdde92e230719f77ac3a5f936e25ed4e01ec057f Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:17 +0200 Subject: Bluetooth: Notify adv monitor removed event This notifies management sockets on MGMT_EV_ADV_MONITOR_REMOVED event. The following test was performed. - Start two btmgmt consoles, issue a btmgmt advmon-remove command on one console and observe a MGMT_EV_ADV_MONITOR_REMOVED event on the other. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3268d9a00608..b194da4de2d7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -192,6 +192,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_EXT_INFO_CHANGED, MGMT_EV_EXP_FEATURE_CHANGED, MGMT_EV_ADV_MONITOR_ADDED, + MGMT_EV_ADV_MONITOR_REMOVED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -3988,6 +3989,16 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } +static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, + u16 handle) +{ + struct mgmt_ev_adv_monitor_added ev; + + ev.monitor_handle = cpu_to_le16(handle); + + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); +} + static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -4140,6 +4151,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_remove_adv_monitor *cp = data; struct mgmt_rp_remove_adv_monitor rp; + unsigned int prev_adv_monitors_cnt; u16 handle; int err; @@ -4148,6 +4160,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); handle = __le16_to_cpu(cp->monitor_handle); + prev_adv_monitors_cnt = hdev->adv_monitors_cnt; err = hci_remove_adv_monitor(hdev, handle); if (err == -ENOENT) { @@ -4156,6 +4169,9 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, goto unlock; } + if (hdev->adv_monitors_cnt < prev_adv_monitors_cnt) + mgmt_adv_monitor_removed(sk, hdev, handle); + hci_dev_unlock(hdev); rp.monitor_handle = cp->monitor_handle; -- cgit From 8208f5a9d435e58ee7f53a24d9ccbe7787944537 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Wed, 17 Jun 2020 16:39:18 +0200 Subject: Bluetooth: Update background scan and report device based on advertisement monitors This calls hci_update_background_scan() when there is any update on the advertisement monitors. If there is at least one advertisement monitor, the filtering policy of scan parameters should be 0x00. This also reports device found mgmt events if there is at least one monitor. The following cases were tested with btmgmt advmon-* commands. (1) add a ADV monitor and observe that the passive scanning is triggered. (2) remove the last ADV monitor and observe that the passive scanning is terminated. (3) with a LE peripheral paired, repeat (1) and observe the passive scanning continues. (4) with a LE peripheral paired, repeat (2) and observe the passive scanning continues. (5) with a ADV monitor, suspend/resume the host and observe the passive scanning continues. Signed-off-by: Miao-chen Chou Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_core.c | 13 +++++++++++++ net/bluetooth/hci_event.c | 5 +++-- net/bluetooth/hci_request.c | 17 ++++++++++++++--- net/bluetooth/mgmt.c | 5 ++++- 4 files changed, 34 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 59132b3e2cde..7959b851cc63 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3005,6 +3005,8 @@ void hci_adv_monitors_clear(struct hci_dev *hdev) hci_free_adv_monitor(monitor); idr_destroy(&hdev->adv_monitors_idr); + + hci_update_background_scan(hdev); } void hci_free_adv_monitor(struct adv_monitor *monitor) @@ -3038,6 +3040,9 @@ int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) hdev->adv_monitors_cnt++; monitor->handle = handle; + + hci_update_background_scan(hdev); + return 0; } @@ -3069,9 +3074,17 @@ int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle) idr_for_each(&hdev->adv_monitors_idr, &free_adv_monitor, hdev); } + hci_update_background_scan(hdev); + return 0; } +/* This function requires the caller holds hdev->lock */ +bool hci_is_adv_monitoring(struct hci_dev *hdev) +{ + return !idr_is_empty(&hdev->adv_monitors_idr); +} + struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8981954ff4c4..e08d4dd9a24e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5447,14 +5447,15 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* Passive scanning shouldn't trigger any device found events, * except for devices marked as CONN_REPORT for which we do send - * device found events. + * device found events, or advertisement monitoring requested. */ if (hdev->le_scan_type == LE_SCAN_PASSIVE) { if (type == LE_ADV_DIRECT_IND) return; if (!hci_pend_le_action_lookup(&hdev->pend_le_reports, - bdaddr, bdaddr_type)) + bdaddr, bdaddr_type) && + idr_is_empty(&hdev->adv_monitors_idr)) return; if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index eee9c007a5fb..29decd7e8051 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -413,11 +413,15 @@ static void __hci_update_background_scan(struct hci_request *req) */ hci_discovery_filter_clear(hdev); + BT_DBG("%s ADV monitoring is %s", hdev->name, + hci_is_adv_monitoring(hdev) ? "on" : "off"); + if (list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) { + list_empty(&hdev->pend_le_reports) && + !hci_is_adv_monitoring(hdev)) { /* If there is no pending LE connections or devices - * to be scanned for, we should stop the background - * scanning. + * to be scanned for or no ADV monitors, we should stop the + * background scanning. */ /* If controller is not scanning we are done. */ @@ -794,6 +798,13 @@ static u8 update_white_list(struct hci_request *req) return 0x00; } + /* Once the controller offloading of advertisement monitor is in place, + * the if condition should include the support of MSFT extension + * support. + */ + if (!idr_is_empty(&hdev->adv_monitors_idr)) + return 0x00; + /* Select filter policy to use white list */ return 0x01; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b194da4de2d7..ec66160a673c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -8575,8 +8575,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, if (!hci_discovery_active(hdev)) { if (link_type == ACL_LINK) return; - if (link_type == LE_LINK && list_empty(&hdev->pend_le_reports)) + if (link_type == LE_LINK && + list_empty(&hdev->pend_le_reports) && + !hci_is_adv_monitoring(hdev)) { return; + } } if (hdev->discovery.result_filtering) { -- cgit From 76b139965575e51224d33ea721d9d00a542b6b39 Mon Sep 17 00:00:00 2001 From: Manish Mandlik Date: Wed, 17 Jun 2020 16:39:19 +0200 Subject: Bluetooth: Terminate the link if pairing is cancelled If user decides to cancel the ongoing pairing process (e.g. by clicking the cancel button on pairing/passkey window), abort any ongoing pairing and then terminate the link if it was created because of the pair device action. Signed-off-by: Manish Mandlik Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_conn.c | 11 ++++++++--- net/bluetooth/l2cap_core.c | 6 ++++-- net/bluetooth/mgmt.c | 22 ++++++++++++++++++---- 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 9bdffc4e79b0..47f3a45d7dcb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1174,7 +1174,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev, /* This function requires the caller holds hdev->lock */ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout) + u16 conn_timeout, + enum conn_reasons conn_reason) { struct hci_conn *conn; @@ -1219,6 +1220,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, conn->sec_level = BT_SECURITY_LOW; conn->pending_sec_level = sec_level; conn->conn_timeout = conn_timeout; + conn->conn_reason = conn_reason; hci_update_background_scan(hdev); @@ -1228,7 +1230,8 @@ done: } struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, - u8 sec_level, u8 auth_type) + u8 sec_level, u8 auth_type, + enum conn_reasons conn_reason) { struct hci_conn *acl; @@ -1248,6 +1251,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, hci_conn_hold(acl); + acl->conn_reason = conn_reason; if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; @@ -1264,7 +1268,8 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, struct hci_conn *acl; struct hci_conn *sco; - acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); + acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING, + CONN_REASON_SCO_CONNECT); if (IS_ERR(acl)) return acl; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fe913a5c754a..35d2bc569a2d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7893,11 +7893,13 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, else hcon = hci_connect_le_scan(hdev, dst, dst_type, chan->sec_level, - HCI_LE_CONN_TIMEOUT); + HCI_LE_CONN_TIMEOUT, + CONN_REASON_L2CAP_CHAN); } else { u8 auth_type = l2cap_get_auth_type(chan); - hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type); + hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type, + CONN_REASON_L2CAP_CHAN); } if (IS_ERR(hcon)) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ec66160a673c..2a732cab1dc9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2931,7 +2931,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->addr.type == BDADDR_BREDR) { conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level, - auth_type); + auth_type, CONN_REASON_PAIR_DEVICE); } else { u8 addr_type = le_addr_type(cp->addr.type); struct hci_conn_params *p; @@ -2950,9 +2950,9 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) p->auto_connect = HCI_AUTO_CONN_DISABLED; - conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, - addr_type, sec_level, - HCI_LE_CONN_TIMEOUT); + conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, + sec_level, HCI_LE_CONN_TIMEOUT, + CONN_REASON_PAIR_DEVICE); } if (IS_ERR(conn)) { @@ -3053,6 +3053,20 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0, addr, sizeof(*addr)); + + /* Since user doesn't want to proceed with the connection, abort any + * ongoing pairing and then terminate the link if it was created + * because of the pair device action. + */ + if (addr->type == BDADDR_BREDR) + hci_remove_link_key(hdev, &addr->bdaddr); + else + smp_cancel_and_remove_pairing(hdev, &addr->bdaddr, + le_addr_type(addr->type)); + + if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + unlock: hci_dev_unlock(hdev); return err; -- cgit From 46605a271114f1243c807a55c607271c81e662d2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 17 Jun 2020 16:39:20 +0200 Subject: Bluetooth: mgmt: Use command complete on success for set system config The command status reply is only for failure. When completing set system config command, the reply has to be command complete. Signed-off-by: Marcel Holtmann Reviewed-by: Alain Michaud Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt_config.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c index 8e7ad2a51dbb..8d01a8ff85e9 100644 --- a/net/bluetooth/mgmt_config.c +++ b/net/bluetooth/mgmt_config.c @@ -247,9 +247,8 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, buffer += exp_len; } - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_SET_DEF_SYSTEM_CONFIG, - MGMT_STATUS_SUCCESS); + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_DEF_SYSTEM_CONFIG, 0, NULL, 0); } int read_def_runtime_config(struct sock *sk, struct hci_dev *hdev, void *data, -- cgit From 7c7982cbadbb63eb76401ddc4ef090cf7ae274b4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 17 Jun 2020 10:42:26 -0700 Subject: bpf: sk_storage: Prefer to get a free cache_idx The cache_idx is currently picked by RR. There is chance that the same cache_idx will be picked by multiple sk_storage_maps while other cache_idx is still unused. e.g. It could happen when the sk_storage_map is recreated during the restart of the user space process. This patch tracks the usage count for each cache_idx. There is 16 of them now (defined in BPF_SK_STORAGE_CACHE_SIZE). It will try to pick the free cache_idx. If none was found, it would pick one with the minimal usage count. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200617174226.2301909-1-kafai@fb.com --- net/core/bpf_sk_storage.c | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d2c4d16dadba..1dae4b543243 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -11,8 +11,6 @@ #include #include -static atomic_t cache_idx; - #define SK_STORAGE_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_CLONE) @@ -81,6 +79,9 @@ struct bpf_sk_storage_elem { #define SDATA(_SELEM) (&(_SELEM)->sdata) #define BPF_SK_STORAGE_CACHE_SIZE 16 +static DEFINE_SPINLOCK(cache_idx_lock); +static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE]; + struct bpf_sk_storage { struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; struct hlist_head list; /* List of bpf_sk_storage_elem */ @@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) return 0; } +static u16 cache_idx_get(void) +{ + u64 min_usage = U64_MAX; + u16 i, res = 0; + + spin_lock(&cache_idx_lock); + + for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) { + if (cache_idx_usage_counts[i] < min_usage) { + min_usage = cache_idx_usage_counts[i]; + res = i; + + /* Found a free cache_idx */ + if (!min_usage) + break; + } + } + cache_idx_usage_counts[res]++; + + spin_unlock(&cache_idx_lock); + + return res; +} + +static void cache_idx_free(u16 idx) +{ + spin_lock(&cache_idx_lock); + cache_idx_usage_counts[idx]--; + spin_unlock(&cache_idx_lock); +} + /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { @@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) smap = (struct bpf_sk_storage_map *)map; + cache_idx_free(smap->cache_idx); + /* Note that this map might be concurrently cloned from * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * RCU read section to finish before proceeding. New RCU @@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) } smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; - smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % - BPF_SK_STORAGE_CACHE_SIZE; + smap->cache_idx = cache_idx_get(); return &smap->map; } -- cgit From 427d5838e99632e9218eae752009c873cade89ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 09:40:51 -0700 Subject: net: napi: remove useless stack trace Whenever a buggy NAPI driver returns more than its budget, we emit a stack trace that is of no use, since it does not tell which driver is buggy. Instead, emit a message giving the function name, and a descriptive message. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6bc2388141f6..cea7fc1716ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6683,7 +6683,9 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) trace_napi_poll(n, work, weight); } - WARN_ON_ONCE(work > weight); + if (unlikely(work > weight)) + pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n", + n->poll, work, weight); if (likely(work < weight)) goto out_unlock; -- cgit From 504b912150983a8b2499bbf9e4501336677404c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:24 -0700 Subject: net: tso: constify tso_count_descs() and friends skb argument of tso_count_descs(), tso_build_hdr() and tso_build_data() can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/tso.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/tso.c b/net/core/tso.c index d4d5c077ad72..56487e3bb26d 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -6,14 +6,14 @@ #include /* Calculate expected number of TX descriptors */ -int tso_count_descs(struct sk_buff *skb) +int tso_count_descs(const struct sk_buff *skb) { /* The Marvell Way */ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags; } EXPORT_SYMBOL(tso_count_descs); -void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, +void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { struct tcphdr *tcph; @@ -44,7 +44,7 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, } EXPORT_SYMBOL(tso_build_hdr); -void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) +void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size) { tso->tcp_seq += size; tso->size -= size; -- cgit From 761b331cb6902dc0a08f786e9fa0dbd572059027 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:25 -0700 Subject: net: tso: cache transport header length Add tlen field into struct tso_t, and change tso_start() to return skb_transport_offset(skb) + tso->tlen This removes from callers the need to use tcp_hdrlen(skb) and will ease UDP segmentation offload addition. v2: calls tso_start() earlier in otx2_sq_append_tso() [Jakub] Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/tso.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/tso.c b/net/core/tso.c index 56487e3bb26d..9f35518815bd 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -17,7 +17,7 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { struct tcphdr *tcph; - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int hdr_len = skb_transport_offset(skb) + tso->tlen; int mac_hdr_len = skb_network_offset(skb); memcpy(hdr, skb->data, hdr_len); @@ -30,7 +30,7 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, } else { struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len); - iph->payload_len = htons(size + tcp_hdrlen(skb)); + iph->payload_len = htons(size + tso->tlen); } tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); put_unaligned_be32(tso->tcp_seq, &tcph->seq); @@ -62,10 +62,12 @@ void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size) } EXPORT_SYMBOL(tso_build_data); -void tso_start(struct sk_buff *skb, struct tso_t *tso) +int tso_start(struct sk_buff *skb, struct tso_t *tso) { - int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int tlen = tcp_hdrlen(skb); + int hdr_len = skb_transport_offset(skb) + tlen; + tso->tlen = tlen; tso->ip_id = ntohs(ip_hdr(skb)->id); tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); tso->next_frag_idx = 0; @@ -83,5 +85,6 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) tso->data = skb_frag_address(frag); tso->next_frag_idx++; } + return hdr_len; } EXPORT_SYMBOL(tso_start); -- cgit From 3d5b459ba0e3788ab471e8cb98eee89964a9c5e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jun 2020 20:53:26 -0700 Subject: net: tso: add UDP segmentation support Note that like TCP, we do not support additional encapsulations, and that checksums must be offloaded to the NIC. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/tso.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/tso.c b/net/core/tso.c index 9f35518815bd..4148f6d48953 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -16,7 +16,6 @@ EXPORT_SYMBOL(tso_count_descs); void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last) { - struct tcphdr *tcph; int hdr_len = skb_transport_offset(skb) + tso->tlen; int mac_hdr_len = skb_network_offset(skb); @@ -32,21 +31,29 @@ void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, iph->payload_len = htons(size + tso->tlen); } - tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); - put_unaligned_be32(tso->tcp_seq, &tcph->seq); + hdr += skb_transport_offset(skb); + if (tso->tlen != sizeof(struct udphdr)) { + struct tcphdr *tcph = (struct tcphdr *)hdr; - if (!is_last) { - /* Clear all special flags for not last packet */ - tcph->psh = 0; - tcph->fin = 0; - tcph->rst = 0; + put_unaligned_be32(tso->tcp_seq, &tcph->seq); + + if (!is_last) { + /* Clear all special flags for not last packet */ + tcph->psh = 0; + tcph->fin = 0; + tcph->rst = 0; + } + } else { + struct udphdr *uh = (struct udphdr *)hdr; + + uh->len = htons(sizeof(*uh) + size); } } EXPORT_SYMBOL(tso_build_hdr); void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size) { - tso->tcp_seq += size; + tso->tcp_seq += size; /* not worth avoiding this operation for UDP */ tso->size -= size; tso->data += size; @@ -64,12 +71,12 @@ EXPORT_SYMBOL(tso_build_data); int tso_start(struct sk_buff *skb, struct tso_t *tso) { - int tlen = tcp_hdrlen(skb); + int tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr); int hdr_len = skb_transport_offset(skb) + tlen; tso->tlen = tlen; tso->ip_id = ntohs(ip_hdr(skb)->id); - tso->tcp_seq = ntohl(tcp_hdr(skb)->seq); + tso->tcp_seq = (tlen != sizeof(struct udphdr)) ? ntohl(tcp_hdr(skb)->seq) : 0; tso->next_frag_idx = 0; tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6); -- cgit From 4b61d3e8d3daebbde7ec02d593f84248fdf8bec2 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Fri, 19 Jun 2020 14:01:07 +0800 Subject: net: qos offload add flow status with dropped count This patch adds a drop frames counter to tc flower offloading. Reporting h/w dropped frames is necessary for some actions. Some actions like police action and the coming introduced stream gate action would produce dropped frames which is necessary for user. Status update shows how many filtered packets increasing and how many dropped in those packets. v2: Changes - Update commit comments suggest by Jiri Pirko. Signed-off-by: Po Liu Reviewed-by: Simon Horman Reviewed-by: Vlad Buslov Signed-off-by: David S. Miller --- net/sched/act_api.c | 10 ++++------ net/sched/act_ct.c | 6 +++--- net/sched/act_gact.c | 7 ++++--- net/sched/act_gate.c | 6 +++--- net/sched/act_mirred.c | 6 +++--- net/sched/act_pedit.c | 6 +++--- net/sched/act_police.c | 4 ++-- net/sched/act_skbedit.c | 5 +++-- net/sched/act_vlan.c | 6 +++--- net/sched/cls_flower.c | 1 + net/sched/cls_matchall.c | 3 ++- 11 files changed, 31 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8ac7eb0a8309..4c4466f18801 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1059,14 +1059,13 @@ err: return err; } -void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets, - bool drop, bool hw) +void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, bool hw) { if (a->cpu_bstats) { _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - if (drop) - this_cpu_ptr(a->cpu_qstats)->drops += packets; + this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), @@ -1075,8 +1074,7 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u32 packets, } _bstats_update(&a->tcfa_bstats, bytes, packets); - if (drop) - a->tcfa_qstats.drops += packets; + a->tcfa_qstats.drops += drops; if (hw) _bstats_update(&a->tcfa_bstats_hw, bytes, packets); } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e9f3576cbf71..1b9c6d4a1b6b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1450,12 +1450,12 @@ static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } -static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, - u64 lastuse, bool hw) +static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) { struct tcf_ct *c = to_ct(a); - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 416065772719..410e3bbfb9ca 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -171,14 +171,15 @@ static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, return action; } -static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, - u64 lastuse, bool hw) +static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) { struct tcf_gact *gact = to_gact(a); int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; - tcf_action_update_stats(a, bytes, packets, action == TC_ACT_SHOT, hw); + tcf_action_update_stats(a, bytes, packets, + action == TC_ACT_SHOT ? packets : drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 9c628591f452..c818844846b1 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -568,13 +568,13 @@ static int tcf_gate_walker(struct net *net, struct sk_buff *skb, return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets, - u64 lastuse, bool hw) +static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) { struct tcf_gate *gact = to_gate(a); struct tcf_t *tm = &gact->tcf_tm; - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 83dd82fc9f40..b2705318993b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -312,13 +312,13 @@ out: return retval; } -static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, - u64 lastuse, bool hw) +static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) { struct tcf_mirred *m = to_mirred(a); struct tcf_t *tm = &m->tcf_tm; - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index d41d6200d9de..66986db062ed 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -409,13 +409,13 @@ done: return p->tcf_action; } -static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u32 packets, - u64 lastuse, bool hw) +static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) { struct tcf_pedit *d = to_pedit(a); struct tcf_t *tm = &d->tcf_tm; - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8b7a0ac96c51..0b431d493768 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -288,13 +288,13 @@ static void tcf_police_cleanup(struct tc_action *a) } static void tcf_police_stats_update(struct tc_action *a, - u64 bytes, u32 packets, + u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { struct tcf_police *police = to_police(a); struct tcf_t *tm = &police->tcf_tm; - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index b125b2be4467..361b863e0634 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -74,12 +74,13 @@ err: } static void tcf_skbedit_stats_update(struct tc_action *a, u64 bytes, - u32 packets, u64 lastuse, bool hw) + u64 packets, u64 drops, + u64 lastuse, bool hw) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_t *tm = &d->tcf_tm; - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index c91d3958fcbb..a5ff9f68ab02 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -302,13 +302,13 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, return tcf_generic_walker(tn, skb, cb, type, ops, extack); } -static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets, - u64 lastuse, bool hw) +static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, + u64 drops, u64 lastuse, bool hw) { struct tcf_vlan *v = to_vlan(a); struct tcf_t *tm = &v->tcf_tm; - tcf_action_update_stats(a, bytes, packets, false, hw); + tcf_action_update_stats(a, bytes, packets, drops, hw); tm->lastuse = max_t(u64, tm->lastuse, lastuse); } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b2da37286082..391971672d54 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -491,6 +491,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, + cls_flower.stats.drops, cls_flower.stats.lastused, cls_flower.stats.used_hw_stats, cls_flower.stats.used_hw_stats_valid); diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 8d39dbcf1746..cafb84480bab 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -338,7 +338,8 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, - cls_mall.stats.pkts, cls_mall.stats.lastused, + cls_mall.stats.pkts, cls_mall.stats.drops, + cls_mall.stats.lastused, cls_mall.stats.used_hw_stats, cls_mall.stats.used_hw_stats_valid); } -- cgit From cc7a21b6fbd945f8d8f61422ccd27203c1fafeb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jun 2020 12:02:59 -0700 Subject: ipv6: icmp6: avoid indirect call for icmpv6_send() If IPv6 is builtin, we do not need an expensive indirect call to reach icmp6_send(). v2: put inline keyword before the type to avoid sparse warnings. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 5 +++-- net/ipv6/ip6_icmp.c | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fc5000370030..91e0f2fd2523 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -439,8 +439,8 @@ static int icmp6_iif(const struct sk_buff *skb) /* * Send an ICMP message in response to a packet in error */ -static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr) +void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct in6_addr *force_saddr) { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -625,6 +625,7 @@ out: out_bh_enable: local_bh_enable(); } +EXPORT_SYMBOL(icmp6_send); /* Slightly more convenient version of icmp6_send. */ diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index e0086758b6ee..70c8c2f36c98 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -9,6 +9,8 @@ #if IS_ENABLED(CONFIG_IPV6) +#if !IS_BUILTIN(CONFIG_IPV6) + static ip6_icmp_send_t __rcu *ip6_icmp_send; int inet6_register_icmp_sender(ip6_icmp_send_t *fn) @@ -37,14 +39,12 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) rcu_read_lock(); send = rcu_dereference(ip6_icmp_send); - - if (!send) - goto out; - send(skb, type, code, info, NULL); -out: + if (send) + send(skb, type, code, info, NULL); rcu_read_unlock(); } EXPORT_SYMBOL(icmpv6_send); +#endif #if IS_ENABLED(CONFIG_NF_NAT) #include -- cgit From e034c6d23bc43266af1fa983212218f4aa38f995 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 Jun 2020 08:35:00 -0500 Subject: tipc: Use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/tipc/link.c | 8 ++++---- net/tipc/msg.h | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index eac89a3e22ce..1c579357ccdf 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1385,12 +1385,12 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, p = (struct tipc_gap_ack_blks *)msg_data(hdr); sz = ntohs(p->len); /* Sanity check */ - if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) { + if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) { /* Good, check if the desired type exists */ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) goto ok; /* Backward compatible: peer might not support bc, but uc? */ - } else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) { + } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) { if (p->ugack_cnt) { p->bgack_cnt = 0; goto ok; @@ -1472,7 +1472,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; /* Total len */ - len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt); + len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt); ga->len = htons(len); return len; } @@ -1521,7 +1521,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, gacks = &ga->gacks[ga->bgack_cnt]; } else if (ga) { /* Copy the Gap ACKs, bc part, for later renewal if needed */ - this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt), + this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt), GFP_ATOMIC); if (likely(this_ga)) { this_ga->start_index = 0; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 65119e81ff0c..1016e96db5c4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -189,11 +189,9 @@ struct tipc_gap_ack_blks { struct tipc_gap_ack gacks[]; }; -#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \ - sizeof(struct tipc_gap_ack) * (n)) - #define MAX_GAP_ACK_BLKS 128 -#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS) +#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \ + sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS) static inline struct tipc_msg *buf_msg(struct sk_buff *skb) { -- cgit From 11a33de2df06b4ed23844181cb5c03eea67bfed1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 Jun 2020 09:46:48 -0500 Subject: taprio: Use struct_size() in kzalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. Also, remove unnecessary variable _size_. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b1eb12d33b9a..e981992634dd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1108,11 +1108,10 @@ static void setup_txtime(struct taprio_sched *q, static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) { - size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries + - sizeof(struct __tc_taprio_qopt_offload); struct __tc_taprio_qopt_offload *__offload; - __offload = kzalloc(size, GFP_KERNEL); + __offload = kzalloc(struct_size(__offload, offload.entries, num_entries), + GFP_KERNEL); if (!__offload) return NULL; -- cgit From c5eb179edd8a169949b1fb153ecfd04649a71c8d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 Jun 2020 09:53:42 -0500 Subject: net/sched: cls_u32: Use struct_size() in kzalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index e15ff335953d..771b068f8254 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -796,9 +796,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, struct tc_u32_sel *s = &n->sel; struct tc_u_knode *new; - new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), - GFP_KERNEL); - + new = kzalloc(struct_size(new, sel.keys, s->nkeys), GFP_KERNEL); if (!new) return NULL; -- cgit From 49042c220b3a31e25902b36df71b23dc10efa0b8 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Sat, 20 Jun 2020 00:54:43 +0200 Subject: l3mdev: add infrastructure for table to VRF mapping Add infrastructure to l3mdev (the core code for Layer 3 master devices) in order to find out the corresponding VRF device for a given table id. Therefore, the l3mdev implementations: - can register a callback that returns the device index of the l3mdev associated with a given table id; - can offer the lookup function (table to VRF device). Signed-off-by: Andrea Mayer Signed-off-by: David S. Miller --- net/l3mdev/l3mdev.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'net') diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index f35899d45a9a..e71ca5aec684 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -9,6 +9,99 @@ #include #include +static DEFINE_SPINLOCK(l3mdev_lock); + +struct l3mdev_handler { + lookup_by_table_id_t dev_lookup; +}; + +static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1]; + +static int l3mdev_check_type(enum l3mdev_type l3type) +{ + if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX) + return -EINVAL; + + return 0; +} + +int l3mdev_table_lookup_register(enum l3mdev_type l3type, + lookup_by_table_id_t fn) +{ + struct l3mdev_handler *hdlr; + int res; + + res = l3mdev_check_type(l3type); + if (res) + return res; + + hdlr = &l3mdev_handlers[l3type]; + + spin_lock(&l3mdev_lock); + + if (hdlr->dev_lookup) { + res = -EBUSY; + goto unlock; + } + + hdlr->dev_lookup = fn; + res = 0; + +unlock: + spin_unlock(&l3mdev_lock); + + return res; +} +EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register); + +void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, + lookup_by_table_id_t fn) +{ + struct l3mdev_handler *hdlr; + + if (l3mdev_check_type(l3type)) + return; + + hdlr = &l3mdev_handlers[l3type]; + + spin_lock(&l3mdev_lock); + + if (hdlr->dev_lookup == fn) + hdlr->dev_lookup = NULL; + + spin_unlock(&l3mdev_lock); +} +EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister); + +int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, + struct net *net, u32 table_id) +{ + lookup_by_table_id_t lookup; + struct l3mdev_handler *hdlr; + int ifindex = -EINVAL; + int res; + + res = l3mdev_check_type(l3type); + if (res) + return res; + + hdlr = &l3mdev_handlers[l3type]; + + spin_lock(&l3mdev_lock); + + lookup = hdlr->dev_lookup; + if (!lookup) + goto unlock; + + ifindex = lookup(net, table_id); + +unlock: + spin_unlock(&l3mdev_lock); + + return ifindex; +} +EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id); + /** * l3mdev_master_ifindex - get index of L3 master device * @dev: targeted interface -- cgit From 8eaf8d99409009b7ab7853f3ac603928458c2022 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Thu, 18 Jun 2020 16:36:31 -0400 Subject: Remove redundant condition in qdisc_graft parent cannot be NULL here since its in the else part of the if (parent == NULL) condition. Remove the extra check on parent pointer. Signed-off-by: Gaurav Singh Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9a3449b56bd6..11ebba60da3b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1093,8 +1093,7 @@ skip: int err; /* Only support running class lockless if parent is lockless */ - if (new && (new->flags & TCQ_F_NOLOCK) && - parent && !(parent->flags & TCQ_F_NOLOCK)) + if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK)) qdisc_clear_nolock(new); if (!cops || !cops->graft) -- cgit From 05e22e8395058745bd0312bc488b522197852aff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jun 2020 12:12:34 -0700 Subject: tcp: remove indirect calls for icsk->icsk_af_ops->queue_xmit Mitigate RETPOLINE costs in __tcp_transmit_skb() by using INDIRECT_CALL_INET() wrapper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 6 ++++++ net/ipv4/tcp_output.c | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 090d3097ee15..d946356187ed 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -539,6 +539,12 @@ no_route: } EXPORT_SYMBOL(__ip_queue_xmit); +int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) +{ + return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos); +} +EXPORT_SYMBOL(ip_queue_xmit); + static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a50e1990a845..be1bd37185d8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1064,6 +1064,9 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); } +INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); +INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -1235,7 +1238,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcp_add_tx_delay(skb, tp); - err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); + err = INDIRECT_CALL_INET(icsk->icsk_af_ops->queue_xmit, + inet6_csk_xmit, ip_queue_xmit, + sk, skb, &inet->cork.fl); if (unlikely(err > 0)) { tcp_enter_cwr(sk); -- cgit From dd2e0b86fc4ee146ac8f3275833d0187efeb950a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 19 Jun 2020 12:12:35 -0700 Subject: tcp: remove indirect calls for icsk->icsk_af_ops->send_check Mitigate RETPOLINE costs in __tcp_transmit_skb() by using INDIRECT_CALL_INET() wrapper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 5 ++++- net/ipv6/tcp_ipv6.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index be1bd37185d8..04b70fe31fa2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1066,6 +1066,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); +INDIRECT_CALLABLE_DECLARE(void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)); /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial @@ -1210,7 +1211,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, } #endif - icsk->icsk_af_ops->send_check(sk, skb); + INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check, + tcp_v6_send_check, tcp_v4_send_check, + sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f67d45ff00b4..4502db706f75 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1811,6 +1811,13 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor = tcp_twsk_destructor, }; +INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr); +} + const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, -- cgit From 8bf1539515920810b86cf1783dc433b1a25d31df Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Fri, 19 Jun 2020 15:24:13 -0400 Subject: Remove redundant skb null check Remove the redundant null check for skb. Signed-off-by: Gaurav Singh Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4c4466f18801..063d8aaf2900 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1473,7 +1473,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; - u32 portid = skb ? NETLINK_CB(skb).portid : 0; + u32 portid = NETLINK_CB(skb).portid; int ret = 0, ovr = 0; if ((n->nlmsg_type != RTM_GETACTION) && -- cgit From 3ca33e3fb4f919b66a72145a87bfeada079e750d Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Fri, 19 Jun 2020 17:10:24 -0700 Subject: Bluetooth: Add hci_dev_lock to get/set device flags Adding hci_dev_lock since hci_conn_params_(lookup|add) require this lock. Suggested-by: Miao-chen Chou Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2a732cab1dc9..5e9b9728eeac 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3895,6 +3895,8 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "Get device flags %pMR (type 0x%x)\n", &cp->addr.bdaddr, cp->addr.type); + hci_dev_lock(hdev); + if (cp->addr.type == BDADDR_BREDR) { br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &cp->addr.bdaddr, @@ -3921,6 +3923,8 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, status = MGMT_STATUS_SUCCESS; done: + hci_dev_unlock(hdev); + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_DEVICE_FLAGS, status, &rp, sizeof(rp)); } @@ -3959,6 +3963,8 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } + hci_dev_lock(hdev); + if (cp->addr.type == BDADDR_BREDR) { br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &cp->addr.bdaddr, @@ -3985,6 +3991,8 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data, } done: + hci_dev_unlock(hdev); + if (status == MGMT_STATUS_SUCCESS) device_flags_changed(sk, hdev, &cp->addr.bdaddr, cp->addr.type, supported_flags, current_flags); -- cgit From 5cbd3ebde859bd43bd0584c146060638b1a3abb4 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Mon, 22 Jun 2020 13:30:28 +0000 Subject: Bluetooth: use configured params for ext adv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the extended advertisement feature is enabled, a hardcoded min and max interval of 0x8000 is used. This patch fixes this issue by using the configured min/max value. This was validated by setting min/max in main.conf and making sure the right setting is applied: < HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25 #93 [hci0] 10.953011 … Min advertising interval: 181.250 msec (0x0122) Max advertising interval: 181.250 msec (0x0122) … Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Reviewed-by: Daniel Winkler Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 29decd7e8051..86ae4b953a01 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1799,8 +1799,6 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) int err; struct adv_info *adv_instance; bool secondary_adv; - /* In ext adv set param interval is 3 octets */ - const u8 adv_interval[3] = { 0x00, 0x08, 0x00 }; if (instance > 0) { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1833,8 +1831,9 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) memset(&cp, 0, sizeof(cp)); - memcpy(cp.min_interval, adv_interval, sizeof(cp.min_interval)); - memcpy(cp.max_interval, adv_interval, sizeof(cp.max_interval)); + /* In ext adv set param interval is 3 octets */ + hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); + hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); -- cgit From 8746f135bb01872ff412d408ea1aa9ebd328c1f5 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 20 May 2020 14:20:14 -0700 Subject: Bluetooth: Disconnect if E0 is used for Level 4 E0 is not allowed with Level 4: BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C page 1319: '128-bit equivalent strength for link and encryption keys required using FIPS approved algorithms (E0 not allowed, SAFER+ not allowed, and P-192 not allowed; encryption key not shortened' SC enabled: > HCI Event: Read Remote Extended Features (0x23) plen 13 Status: Success (0x00) Handle: 256 Page: 1/2 Features: 0x0b 0x00 0x00 0x00 0x00 0x00 0x00 0x00 Secure Simple Pairing (Host Support) LE Supported (Host) Secure Connections (Host Support) > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 256 Encryption: Enabled with AES-CCM (0x02) SC disabled: > HCI Event: Read Remote Extended Features (0x23) plen 13 Status: Success (0x00) Handle: 256 Page: 1/2 Features: 0x03 0x00 0x00 0x00 0x00 0x00 0x00 0x00 Secure Simple Pairing (Host Support) LE Supported (Host) > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 256 Encryption: Enabled with E0 (0x01) [May 8 20:23] Bluetooth: hci0: Invalid security: expect AES but E0 was used < HCI Command: Disconnect (0x01|0x0006) plen 3 Handle: 256 Reason: Authentication Failure (0x05) Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 17 +++++++++++++++++ net/bluetooth/hci_event.c | 20 ++++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 47f3a45d7dcb..8805d68e65f2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1322,6 +1322,23 @@ int hci_conn_check_link_mode(struct hci_conn *conn) return 0; } + /* AES encryption is required for Level 4: + * + * BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C + * page 1319: + * + * 128-bit equivalent strength for link and encryption keys + * required using FIPS approved algorithms (E0 not allowed, + * SAFER+ not allowed, and P-192 not allowed; encryption key + * not shortened) + */ + if (conn->sec_level == BT_SECURITY_FIPS && + !test_bit(HCI_CONN_AES_CCM, &conn->flags)) { + bt_dev_err(conn->hdev, + "Invalid security: Missing AES-CCM usage"); + return 0; + } + if (hci_conn_ssp_enabled(conn) && !test_bit(HCI_CONN_ENCRYPT, &conn->flags)) return 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e08d4dd9a24e..e060fc9ebb18 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3065,27 +3065,23 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); + /* Check link security requirements are met */ + if (!hci_conn_check_link_mode(conn)) + ev->status = HCI_ERROR_AUTH_FAILURE; + if (ev->status && conn->state == BT_CONNECTED) { if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING) set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags); + /* Notify upper layers so they can cleanup before + * disconnecting. + */ + hci_encrypt_cfm(conn, ev->status); hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); hci_conn_drop(conn); goto unlock; } - /* In Secure Connections Only mode, do not allow any connections - * that are not encrypted with AES-CCM using a P-256 authenticated - * combination key. - */ - if (hci_dev_test_flag(hdev, HCI_SC_ONLY) && - (!test_bit(HCI_CONN_AES_CCM, &conn->flags) || - conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) { - hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE); - hci_conn_drop(conn); - goto unlock; - } - /* Try reading the encryption key size for encrypted ACL links */ if (!ev->status && ev->encrypt && conn->type == ACL_LINK) { struct hci_cp_read_enc_key_size cp; -- cgit From 032a6b3565489a26d6841eefa1fc29d95fc80c66 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:42 -0700 Subject: bpf: Rename bpf_htab to bpf_shtab in sock_map There are two different `struct bpf_htab` in bpf code in the following files: - kernel/bpf/hashtab.c - net/core/sock_map.c It makes it impossible to find proper btf_id by name = "bpf_htab" and kind = BTF_KIND_STRUCT what is needed to support access to map ptr so that bpf program can access `struct bpf_htab` fields. To make it possible one of the struct-s should be renamed, sock_map.c looks like a better candidate for rename since it's specialized version of hashtab. Rename it to bpf_shtab ("sh" stands for Sock Hash). Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/c006a639e03c64ca50fc87c4bb627e0bfba90f4e.1592600985.git.rdna@fb.com --- net/core/sock_map.c | 82 ++++++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4059f94e9bb5..2b884f2d562a 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -655,7 +655,7 @@ const struct bpf_map_ops sock_map_ops = { .map_check_btf = map_check_no_btf, }; -struct bpf_htab_elem { +struct bpf_shtab_elem { struct rcu_head rcu; u32 hash; struct sock *sk; @@ -663,14 +663,14 @@ struct bpf_htab_elem { u8 key[]; }; -struct bpf_htab_bucket { +struct bpf_shtab_bucket { struct hlist_head head; raw_spinlock_t lock; }; -struct bpf_htab { +struct bpf_shtab { struct bpf_map map; - struct bpf_htab_bucket *buckets; + struct bpf_shtab_bucket *buckets; u32 buckets_num; u32 elem_size; struct sk_psock_progs progs; @@ -682,17 +682,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len) return jhash(key, len, 0); } -static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab, - u32 hash) +static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab, + u32 hash) { return &htab->buckets[hash & (htab->buckets_num - 1)]; } -static struct bpf_htab_elem * +static struct bpf_shtab_elem * sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, u32 key_size) { - struct bpf_htab_elem *elem; + struct bpf_shtab_elem *elem; hlist_for_each_entry_rcu(elem, head, node) { if (elem->hash == hash && @@ -705,10 +705,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 key_size = map->key_size, hash; - struct bpf_htab_bucket *bucket; - struct bpf_htab_elem *elem; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; WARN_ON_ONCE(!rcu_read_lock_held()); @@ -719,8 +719,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) return elem ? elem->sk : NULL; } -static void sock_hash_free_elem(struct bpf_htab *htab, - struct bpf_htab_elem *elem) +static void sock_hash_free_elem(struct bpf_shtab *htab, + struct bpf_shtab_elem *elem) { atomic_dec(&htab->count); kfree_rcu(elem, rcu); @@ -729,9 +729,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab, static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, void *link_raw) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_elem *elem_probe, *elem = link_raw; - struct bpf_htab_bucket *bucket; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_elem *elem_probe, *elem = link_raw; + struct bpf_shtab_bucket *bucket; WARN_ON_ONCE(!rcu_read_lock_held()); bucket = sock_hash_select_bucket(htab, elem->hash); @@ -753,10 +753,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, static int sock_hash_delete_elem(struct bpf_map *map, void *key) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 hash, key_size = map->key_size; - struct bpf_htab_bucket *bucket; - struct bpf_htab_elem *elem; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; int ret = -ENOENT; hash = sock_hash_bucket_hash(key, key_size); @@ -774,12 +774,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) return ret; } -static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab, - void *key, u32 key_size, - u32 hash, struct sock *sk, - struct bpf_htab_elem *old) +static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab, + void *key, u32 key_size, + u32 hash, struct sock *sk, + struct bpf_shtab_elem *old) { - struct bpf_htab_elem *new; + struct bpf_shtab_elem *new; if (atomic_inc_return(&htab->count) > htab->map.max_entries) { if (!old) { @@ -803,10 +803,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab, static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 key_size = map->key_size, hash; - struct bpf_htab_elem *elem, *elem_new; - struct bpf_htab_bucket *bucket; + struct bpf_shtab_elem *elem, *elem_new; + struct bpf_shtab_bucket *bucket; struct sk_psock_link *link; struct sk_psock *psock; int ret; @@ -916,8 +916,8 @@ out: static int sock_hash_get_next_key(struct bpf_map *map, void *key, void *key_next) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_elem *elem, *elem_next; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_elem *elem, *elem_next; u32 hash, key_size = map->key_size; struct hlist_head *head; int i = 0; @@ -931,7 +931,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, goto find_first_elem; elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)), - struct bpf_htab_elem, node); + struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); return 0; @@ -943,7 +943,7 @@ find_first_elem: for (; i < htab->buckets_num; i++) { head = &sock_hash_select_bucket(htab, i)->head; elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), - struct bpf_htab_elem, node); + struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); return 0; @@ -955,7 +955,7 @@ find_first_elem: static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) { - struct bpf_htab *htab; + struct bpf_shtab *htab; int i, err; u64 cost; @@ -977,15 +977,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&htab->map, attr); htab->buckets_num = roundup_pow_of_two(htab->map.max_entries); - htab->elem_size = sizeof(struct bpf_htab_elem) + + htab->elem_size = sizeof(struct bpf_shtab_elem) + round_up(htab->map.key_size, 8); if (htab->buckets_num == 0 || - htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) { + htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) { err = -EINVAL; goto free_htab; } - cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) + + cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) + (u64) htab->elem_size * htab->map.max_entries; if (cost >= U32_MAX - PAGE_SIZE) { err = -EINVAL; @@ -996,7 +996,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) goto free_htab; htab->buckets = bpf_map_area_alloc(htab->buckets_num * - sizeof(struct bpf_htab_bucket), + sizeof(struct bpf_shtab_bucket), htab->map.numa_node); if (!htab->buckets) { bpf_map_charge_finish(&htab->map.memory); @@ -1017,10 +1017,10 @@ free_htab: static void sock_hash_free(struct bpf_map *map) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_bucket *bucket; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_bucket *bucket; struct hlist_head unlink_list; - struct bpf_htab_elem *elem; + struct bpf_shtab_elem *elem; struct hlist_node *node; int i; @@ -1096,7 +1096,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key) static void sock_hash_release_progs(struct bpf_map *map) { - psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs); + psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs); } BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops, @@ -1194,7 +1194,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) case BPF_MAP_TYPE_SOCKMAP: return &container_of(map, struct bpf_stab, map)->progs; case BPF_MAP_TYPE_SOCKHASH: - return &container_of(map, struct bpf_htab, map)->progs; + return &container_of(map, struct bpf_shtab, map)->progs; default: break; } -- cgit From 2872e9ac33a4440173418147351ed4f93177e763 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:44 -0700 Subject: bpf: Set map_btf_{name, id} for all map types Set map_btf_name and map_btf_id for all map types so that map fields can be accessed by bpf programs. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/a825f808f22af52b018dbe82f1c7d29dab5fc978.1592600985.git.rdna@fb.com --- net/core/bpf_sk_storage.c | 3 +++ net/core/sock_map.c | 6 ++++++ net/xdp/xskmap.c | 3 +++ 3 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 1dae4b543243..6f921c4ddc2c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -919,6 +919,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) return -ENOENT; } +static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_alloc_check = bpf_sk_storage_map_alloc_check, .map_alloc = bpf_sk_storage_map_alloc, @@ -928,6 +929,8 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_sk_storage_map_check_btf, + .map_btf_name = "bpf_sk_storage_map", + .map_btf_id = &sk_storage_map_btf_id, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 2b884f2d562a..4c1123c749bb 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -643,6 +643,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_map_btf_id; const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, @@ -653,6 +654,8 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_stab", + .map_btf_id = &sock_map_btf_id, }; struct bpf_shtab_elem { @@ -1176,6 +1179,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_hash_map_btf_id; const struct bpf_map_ops sock_hash_ops = { .map_alloc = sock_hash_alloc, .map_free = sock_hash_free, @@ -1186,6 +1190,8 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_shtab", + .map_btf_id = &sock_hash_map_btf_id, }; static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 1dc7208c71ba..8367adbbe9df 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -254,6 +254,7 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, spin_unlock_bh(&map->lock); } +static int xsk_map_btf_id; const struct bpf_map_ops xsk_map_ops = { .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, @@ -264,4 +265,6 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "xsk_map", + .map_btf_id = &xsk_map_btf_id, }; -- cgit From a829eb0d5dc5415bef380cf53e09a0123e716951 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:47 +0000 Subject: net/devlink: Prepare devlink port functions to fill extack Prepare devlink port related functions to optionally fill up the extack information which will be used in subsequent patch by port function attribute(s). Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 2cafbc808b09..05197631d52a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -566,7 +566,8 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_port *devlink_port, enum devlink_command cmd, u32 portid, - u32 seq, int flags) + u32 seq, int flags, + struct netlink_ext_ack *extack) { void *hdr; @@ -634,7 +635,8 @@ static void devlink_port_notify(struct devlink_port *devlink_port, if (!msg) return; - err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0); + err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0, + NULL); if (err) { nlmsg_free(msg); return; @@ -708,7 +710,8 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, err = devlink_nl_port_fill(msg, devlink, devlink_port, DEVLINK_CMD_PORT_NEW, - info->snd_portid, info->snd_seq, 0); + info->snd_portid, info->snd_seq, 0, + info->extack); if (err) { nlmsg_free(msg); return err; @@ -740,7 +743,8 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI); + NLM_F_MULTI, + cb->extack); if (err) { mutex_unlock(&devlink->lock); goto out; -- cgit From 2a916ecc405686c1d86f632281bc06aa75ebae4e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:48 +0000 Subject: net/devlink: Support querying hardware address of port function PCI PF and VF devlink port can manage the function represented by a devlink port. Enable users to query port function's hardware address. Example of a PCI VF port which supports a port function: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:11:22:33:44:66 $ devlink port show pci/0000:06:00.0/2 -jp { "port": { "pci/0000:06:00.0/2": { "type": "eth", "netdev": "enp6s0pf0vf1", "flavour": "pcivf", "pfnum": 0, "vfnum": 1, "function": { "hw_addr": "00:11:22:33:44:66" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 05197631d52a..b6848b607e9c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -563,6 +563,49 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } +static int +devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, + struct netlink_ext_ack *extack) +{ + struct devlink *devlink = port->devlink; + const struct devlink_ops *ops; + struct nlattr *function_attr; + bool empty_nest = true; + int err = 0; + + function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION); + if (!function_attr) + return -EMSGSIZE; + + ops = devlink->ops; + if (ops->port_function_hw_addr_get) { + int uninitialized_var(hw_addr_len); + u8 hw_addr[MAX_ADDR_LEN]; + + err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + if (err == -EOPNOTSUPP) { + /* Port function attributes are optional for a port. If port doesn't + * support function attribute, returning -EOPNOTSUPP is not an error. + */ + err = 0; + goto out; + } else if (err) { + goto out; + } + err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr); + if (err) + goto out; + empty_nest = false; + } + +out: + if (err || empty_nest) + nla_nest_cancel(msg, function_attr); + else + nla_nest_end(msg, function_attr); + return err; +} + static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_port *devlink_port, enum devlink_command cmd, u32 portid, @@ -608,6 +651,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, spin_unlock_bh(&devlink_port->type_lock); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; + if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) + goto nla_put_failure; genlmsg_end(msg, hdr); return 0; -- cgit From a1e8ae907c8d67f57432190bb742802a76516b00 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 19 Jun 2020 03:32:49 +0000 Subject: net/devlink: Support setting hardware address of port function PCI PF and VF devlink port can manage the function represented by a devlink port. Allow users to set port function's hardware address. Example of a PCI VF port which supports a port function: $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55 $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:11:22:33:44:55 Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index b6848b607e9c..baa45eca6b5a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -85,6 +85,10 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); +static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { + [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY }, +}; + static LIST_HEAD(devlink_list); /* devlink_mutex @@ -827,6 +831,67 @@ static int devlink_port_type_set(struct devlink *devlink, return -EOPNOTSUPP; } +static int +devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port, + const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops; + const u8 *hw_addr; + int hw_addr_len; + int err; + + hw_addr = nla_data(attr); + hw_addr_len = nla_len(attr); + if (hw_addr_len > MAX_ADDR_LEN) { + NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long"); + return -EINVAL; + } + if (port->type == DEVLINK_PORT_TYPE_ETH) { + if (hw_addr_len != ETH_ALEN) { + NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device"); + return -EINVAL; + } + if (!is_unicast_ether_addr(hw_addr)) { + NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported"); + return -EINVAL; + } + } + + ops = devlink->ops; + if (!ops->port_function_hw_addr_set) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); + return -EOPNOTSUPP; + } + + err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); + if (err) + return err; + + devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); + return 0; +} + +static int +devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, + const struct nlattr *attr, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr, + devlink_function_nl_policy, extack); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes"); + return err; + } + + attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; + if (attr) + err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + + return err; +} + static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { @@ -842,6 +907,16 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, if (err) return err; } + + if (info->attrs[DEVLINK_ATTR_PORT_FUNCTION]) { + struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION]; + struct netlink_ext_ack *extack = info->extack; + + err = devlink_port_function_set(devlink, devlink_port, attr, extack); + if (err) + return err; + } + return 0; } @@ -6758,6 +6833,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 }, [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 }, [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 }, + [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED }, }; static const struct genl_ops devlink_nl_ops[] = { -- cgit From 272c2330adc9c68284cb0066719160c24bfe605f Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 19 Jun 2020 10:31:52 -0400 Subject: xfrm: bail early on slave pass over skb This is prep work for initial support of bonding hardware encryption pass-through support. The bonding driver will fill in the slave_dev pointer, and we use that to know not to skb_push() again on a given skb that was already processed on the bond device. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: Jeff Kirsher CC: Jakub Kicinski CC: Steffen Klassert CC: Herbert Xu CC: netdev@vger.kernel.org CC: intel-wired-lan@lists.osuosl.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- net/xfrm/xfrm_device.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index f50d1f97cf8e..b8918fc5248b 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -106,6 +106,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur struct sk_buff *skb2, *nskb, *pskb = NULL; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); + struct net_device *dev = skb->dev; struct sec_path *sp; if (!xo) @@ -119,6 +120,10 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) return skb; + /* This skb was already validated on the master dev */ + if ((x->xso.dev != dev) && (x->xso.slave_dev == dev)) + return skb; + local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); err = !skb_queue_empty(&sd->xfrm_backlog); @@ -129,25 +134,20 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb)) { - struct net_device *dev = skb->dev; - - if (unlikely(x->xso.dev != dev)) { - struct sk_buff *segs; + if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { + struct sk_buff *segs; - /* Packet got rerouted, fixup features and segment it. */ - esp_features = esp_features & ~(NETIF_F_HW_ESP - | NETIF_F_GSO_ESP); + /* Packet got rerouted, fixup features and segment it. */ + esp_features = esp_features & ~(NETIF_F_HW_ESP | NETIF_F_GSO_ESP); - segs = skb_gso_segment(skb, esp_features); - if (IS_ERR(segs)) { - kfree_skb(skb); - atomic_long_inc(&dev->tx_dropped); - return NULL; - } else { - consume_skb(skb); - skb = segs; - } + segs = skb_gso_segment(skb, esp_features); + if (IS_ERR(segs)) { + kfree_skb(skb); + atomic_long_inc(&dev->tx_dropped); + return NULL; + } else { + consume_skb(skb); + skb = segs; } } -- cgit From b5872cd0e823e4cb50b3a75cd9522167eeb676a2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 20 Jun 2020 22:01:56 +0530 Subject: devlink: Add support for board.serial_number to info_get cb. Board serial number is a serial number, often available in PCI *Vital Product Data*. Also, update devlink-info.rst documentation file. Cc: Jiri Pirko Cc: Jakub Kicinski Signed-off-by: Vasundhara Volam Reviewed-by: Michael Chan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index baa45eca6b5a..455998a57671 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4502,6 +4502,14 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) } EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); +int devlink_info_board_serial_number_put(struct devlink_info_req *req, + const char *bsn) +{ + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER, + bsn); +} +EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put); + static int devlink_info_version_put(struct devlink_info_req *req, int attr, const char *version_name, const char *version_value) -- cgit From bd869245a3dcca1c8a77dfade7d3dc69a68365df Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 20 Jun 2020 22:35:42 +0200 Subject: net: core: try to runtime-resume detached device in __dev_open A netdevice may be marked as detached because the parent is runtime-suspended and not accessible whilst interface or link is down. An example are PCI network devices that go into PCI D3hot, see e.g. __igc_shutdown() or rtl8169_net_suspend(). If netdevice is down and marked as detached we can only open it if we runtime-resume it before __dev_open() calls netif_device_present(). Therefore, if netdevice is detached, try to runtime-resume the parent and only return with an error if it's still detached. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- net/core/dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index cea7fc1716ca..c5ec4d50acd1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -143,6 +143,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -1492,8 +1493,13 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) ASSERT_RTNL(); - if (!netif_device_present(dev)) - return -ENODEV; + if (!netif_device_present(dev)) { + /* may be detached because parent is runtime-suspended */ + if (dev->dev.parent) + pm_runtime_resume(dev->dev.parent); + if (!netif_device_present(dev)) + return -ENODEV; + } /* Block netpoll from trying to do any rx path servicing. * If we don't do this there is a chance ndo_poll_controller -- cgit From c5efcf17bf84b20dd76ca56de6dd99a40a89c4e3 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Sun, 21 Jun 2020 22:24:30 -0400 Subject: tcindex_change: Remove redundant null check arg cannot be NULL since its already being dereferenced before. Remove the redundant NULL check. Signed-off-by: Gaurav Singh Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 61e95029c18f..78bec347b8b6 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -533,7 +533,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," "p %p,r %p,*arg %p\n", - tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL); + tp, handle, tca, arg, opt, p, r, *arg); if (!opt) return 0; -- cgit From 29cb9868fb69582da3205cf4a5eb5dc8f740ed33 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 23 Jun 2020 13:43:06 +1000 Subject: net/core/devlink.c: remove new uninitialized_var() usage Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 455998a57671..6ae36808c152 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -583,7 +583,7 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por ops = devlink->ops; if (ops->port_function_hw_addr_get) { - int uninitialized_var(hw_addr_len); + int hw_addr_len; u8 hw_addr[MAX_ADDR_LEN]; err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); -- cgit From f9c70bdc279b191da8d60777c627702c06e4a37d Mon Sep 17 00:00:00 2001 From: Lihong Kou Date: Tue, 23 Jun 2020 20:28:41 +0800 Subject: Bluetooth: add a mutex lock to avoid UAF in do_enale_set In the case we set or free the global value listen_chan in different threads, we can encounter the UAF problems because the method is not protected by any lock, add one to avoid this bug. BUG: KASAN: use-after-free in l2cap_chan_close+0x48/0x990 net/bluetooth/l2cap_core.c:730 Read of size 8 at addr ffff888096950000 by task kworker/1:102/2868 CPU: 1 PID: 2868 Comm: kworker/1:102 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events do_enable_set Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1fb/0x318 lib/dump_stack.c:118 print_address_description+0x74/0x5c0 mm/kasan/report.c:374 __kasan_report+0x149/0x1c0 mm/kasan/report.c:506 kasan_report+0x26/0x50 mm/kasan/common.c:641 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 l2cap_chan_close+0x48/0x990 net/bluetooth/l2cap_core.c:730 do_enable_set+0x660/0x900 net/bluetooth/6lowpan.c:1074 process_one_work+0x7f5/0x10f0 kernel/workqueue.c:2264 worker_thread+0xbbc/0x1630 kernel/workqueue.c:2410 kthread+0x332/0x350 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Allocated by task 2870: save_stack mm/kasan/common.c:72 [inline] set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc+0x118/0x1c0 mm/kasan/common.c:515 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:529 kmem_cache_alloc_trace+0x221/0x2f0 mm/slab.c:3551 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] l2cap_chan_create+0x50/0x320 net/bluetooth/l2cap_core.c:446 chan_create net/bluetooth/6lowpan.c:640 [inline] bt_6lowpan_listen net/bluetooth/6lowpan.c:959 [inline] do_enable_set+0x6a4/0x900 net/bluetooth/6lowpan.c:1078 process_one_work+0x7f5/0x10f0 kernel/workqueue.c:2264 worker_thread+0xbbc/0x1630 kernel/workqueue.c:2410 kthread+0x332/0x350 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Freed by task 2870: save_stack mm/kasan/common.c:72 [inline] set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:337 [inline] __kasan_slab_free+0x12e/0x1e0 mm/kasan/common.c:476 kasan_slab_free+0xe/0x10 mm/kasan/common.c:485 __cache_free mm/slab.c:3426 [inline] kfree+0x10d/0x220 mm/slab.c:3757 l2cap_chan_destroy net/bluetooth/l2cap_core.c:484 [inline] kref_put include/linux/kref.h:65 [inline] l2cap_chan_put+0x170/0x190 net/bluetooth/l2cap_core.c:498 do_enable_set+0x66c/0x900 net/bluetooth/6lowpan.c:1075 process_one_work+0x7f5/0x10f0 kernel/workqueue.c:2264 worker_thread+0xbbc/0x1630 kernel/workqueue.c:2410 kthread+0x332/0x350 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 The buggy address belongs to the object at ffff888096950000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 0 bytes inside of 2048-byte region [ffff888096950000, ffff888096950800) The buggy address belongs to the page: page:ffffea00025a5400 refcount:1 mapcount:0 mapping:ffff8880aa400e00 index:0x0 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea00027d1548 ffffea0002397808 ffff8880aa400e00 raw: 0000000000000000 ffff888096950000 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88809694ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88809694ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff888096950000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888096950080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888096950100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: syzbot+96414aa0033c363d8458@syzkaller.appspotmail.com Signed-off-by: Lihong Kou Signed-off-by: Marcel Holtmann --- net/bluetooth/6lowpan.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index bb55d92691b0..cff4944d5b66 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -50,6 +50,7 @@ static bool enable_6lowpan; /* We are listening incoming connections via this channel */ static struct l2cap_chan *listen_chan; +static DEFINE_MUTEX(set_lock); struct lowpan_peer { struct list_head list; @@ -1078,12 +1079,14 @@ static void do_enable_set(struct work_struct *work) enable_6lowpan = set_enable->flag; + mutex_lock(&set_lock); if (listen_chan) { l2cap_chan_close(listen_chan, 0); l2cap_chan_put(listen_chan); } listen_chan = bt_6lowpan_listen(); + mutex_unlock(&set_lock); kfree(set_enable); } @@ -1135,11 +1138,13 @@ static ssize_t lowpan_control_write(struct file *fp, if (ret == -EINVAL) return ret; + mutex_lock(&set_lock); if (listen_chan) { l2cap_chan_close(listen_chan, 0); l2cap_chan_put(listen_chan); listen_chan = NULL; } + mutex_unlock(&set_lock); if (conn) { struct lowpan_peer *peer; -- cgit From 55cced4f813bece01f96256d96f283b9210d19ee Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 23 Jun 2020 09:42:32 -0700 Subject: ipv6: fib6: avoid indirect calls from fib6_rule_lookup It was reported that a considerable amount of cycles were spent on the expensive indirect calls on fib6_rule_lookup. This patch introduces an inline helper called pol_route_func that uses the indirect_call_wrappers to avoid the indirect calls. This patch saves around 50ns per call. Performance was measured on the receiver by checking the amount of syncookies that server was able to generate under a synflood load. Traffic was generated using trafgen[1] which was pushing around 1Mpps on a single queue. Receiver was using only one rx queue which help to create a bottle neck and make the experiment rx-bounded. These are the syncookies generated over 10s from the different runs: Whithout the patch: TcpExtSyncookiesSent 3553749 0.0 TcpExtSyncookiesSent 3550895 0.0 TcpExtSyncookiesSent 3553845 0.0 TcpExtSyncookiesSent 3541050 0.0 TcpExtSyncookiesSent 3539921 0.0 TcpExtSyncookiesSent 3557659 0.0 TcpExtSyncookiesSent 3526812 0.0 TcpExtSyncookiesSent 3536121 0.0 TcpExtSyncookiesSent 3529963 0.0 TcpExtSyncookiesSent 3536319 0.0 With the patch: TcpExtSyncookiesSent 3611786 0.0 TcpExtSyncookiesSent 3596682 0.0 TcpExtSyncookiesSent 3606878 0.0 TcpExtSyncookiesSent 3599564 0.0 TcpExtSyncookiesSent 3601304 0.0 TcpExtSyncookiesSent 3609249 0.0 TcpExtSyncookiesSent 3617437 0.0 TcpExtSyncookiesSent 3608765 0.0 TcpExtSyncookiesSent 3620205 0.0 TcpExtSyncookiesSent 3601895 0.0 Without the patch the average is 354263 pkt/s or 2822 ns/pkt and with the patch the average is 360738 pkt/s or 2772 ns/pkt which gives an estimate of 50 ns per packet. [1] http://netsniff-ng.org/ Changelog since v1: - Change ordering in the ICW (Paolo Abeni) Cc: Luigi Rizzo Cc: Paolo Abeni Reported-by: Eric Dumazet Signed-off-by: Brian Vazquez Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 9 ++++++--- net/ipv6/ip6_fib.c | 3 ++- net/ipv6/route.c | 8 ++++---- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index fafe556d21e0..6053ef851555 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -111,11 +111,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, } else { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags); + rt = pol_lookup_func(lookup, + net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put_flags(rt, flags); - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); + rt = pol_lookup_func(lookup, + net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put_flags(rt, flags); @@ -226,7 +228,8 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } - rt = lookup(net, table, flp6, arg->lookup_data, flags); + rt = pol_lookup_func(lookup, + net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { err = fib6_rule_saddr(net, rule, flags, flp6, ip6_dst_idev(&rt->dst)->dev); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 49ee89bbcba0..25a90f3f705c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -314,7 +314,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); + rt = pol_lookup_func(lookup, + net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error == -EAGAIN) { ip6_rt_put_flags(rt, flags); rt = net->ipv6.ip6_null_entry; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 82cbb46a2a4f..5852039ca9cf 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1207,7 +1207,7 @@ fallback: return nrt; } -static struct rt6_info *ip6_pol_route_lookup(struct net *net, +INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, @@ -2274,7 +2274,7 @@ out: } EXPORT_SYMBOL_GPL(ip6_pol_route); -static struct rt6_info *ip6_pol_route_input(struct net *net, +INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, @@ -2465,7 +2465,7 @@ void ip6_route_input(struct sk_buff *skb) &fl6, skb, flags)); } -static struct rt6_info *ip6_pol_route_output(struct net *net, +INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, @@ -2912,7 +2912,7 @@ struct ip6rd_flowi { struct in6_addr gateway; }; -static struct rt6_info *__ip6_route_redirect(struct net *net, +INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, -- cgit From bdfd2d1fa79acd03e18d1683419572f3682b39fd Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 23 Jun 2020 16:40:01 -0400 Subject: bonding/xfrm: use real_dev instead of slave_dev Rather than requiring every hw crypto capable NIC driver to do a check for slave_dev being set, set real_dev in the xfrm layer and xso init time, and then override it in the bonding driver as needed. Then NIC drivers can always use real_dev, and at the same time, we eliminate the use of a variable name that probably shouldn't have been used in the first place, particularly given recent current events. CC: Boris Pismenny CC: Saeed Mahameed CC: Leon Romanovsky CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: "David S. Miller" CC: Jeff Kirsher CC: Jakub Kicinski CC: Steffen Klassert CC: Herbert Xu CC: netdev@vger.kernel.org Suggested-by: Saeed Mahameed Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- net/xfrm/xfrm_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index b8918fc5248b..7b64bb64c822 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -120,8 +120,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) return skb; - /* This skb was already validated on the master dev */ - if ((x->xso.dev != dev) && (x->xso.slave_dev == dev)) + /* This skb was already validated on the upper/virtual dev */ + if ((x->xso.dev != dev) && (x->xso.real_dev == dev)) return skb; local_irq_save(flags); @@ -259,6 +259,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } xso->dev = dev; + xso->real_dev = dev; xso->num_exthdrs = 1; xso->flags = xuo->flags; -- cgit From b03d2142bea8cf7407a0a668ce8f5f115bd226c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:12 -0700 Subject: tcp: move ipv6_specific declaration to remove a warning ipv6_specific should be declared in tcp include files, not mptcp. This removes the following warning : CHECK net/ipv6/tcp_ipv6.c net/ipv6/tcp_ipv6.c:78:42: warning: symbol 'ipv6_specific' was not declared. Should it be static? Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index db56535dfc29..d4294b6d23e4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -355,9 +355,6 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, } extern const struct inet_connection_sock_af_ops ipv4_specific; -#if IS_ENABLED(CONFIG_MPTCP_IPV6) -extern const struct inet_connection_sock_af_ops ipv6_specific; -#endif void mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) -- cgit From 9b9e2f250e3e6f59ad07e6d03838c27a100e0042 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:13 -0700 Subject: tcp: move ipv4_specific to tcp include file Declare ipv4_specific once, in tcp.h were it belongs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d4294b6d23e4..06661781c9af 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -354,8 +354,6 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } -extern const struct inet_connection_sock_af_ops ipv4_specific; - void mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int mptcp_proto_v6_init(void); -- cgit From 5521d95e076238f1615cf1cdb135f318ef798b49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:14 -0700 Subject: net: move tcp gro declarations to net/tcp.h This patch removes following (C=1 W=1) warnings for CONFIG_RETPOLINE=y : net/ipv4/tcp_offload.c:306:16: warning: symbol 'tcp4_gro_receive' was not declared. Should it be static? net/ipv4/tcp_offload.c:306:17: warning: no previous prototype for 'tcp4_gro_receive' [-Wmissing-prototypes] net/ipv4/tcp_offload.c:319:29: warning: symbol 'tcp4_gro_complete' was not declared. Should it be static? net/ipv4/tcp_offload.c:319:29: warning: no previous prototype for 'tcp4_gro_complete' [-Wmissing-prototypes] CHECK net/ipv6/tcpv6_offload.c net/ipv6/tcpv6_offload.c:16:16: warning: symbol 'tcp6_gro_receive' was not declared. Should it be static? net/ipv6/tcpv6_offload.c:29:29: warning: symbol 'tcp6_gro_complete' was not declared. Should it be static? CC net/ipv6/tcpv6_offload.o net/ipv6/tcpv6_offload.c:16:17: warning: no previous prototype for 'tcp6_gro_receive' [-Wmissing-prototypes] 16 | struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) | ^~~~~~~~~~~~~~~~ net/ipv6/tcpv6_offload.c:29:29: warning: no previous prototype for 'tcp6_gro_complete' [-Wmissing-prototypes] 29 | INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) | ^~~~~~~~~~~~~~~~~ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 --- net/ipv6/ip6_offload.c | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 02aa5cb3a4fd..d8dbff1dd1fa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1432,8 +1432,6 @@ static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, return inet_gso_segment(skb, features); } -INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *, - struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, struct sk_buff *)); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) @@ -1608,7 +1606,6 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return -EINVAL; } -INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); int inet_gro_complete(struct sk_buff *skb, int nhoff) { diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 7fbb44736a34..78eec5b42385 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "ip6_offload.h" @@ -177,8 +178,6 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph, return len; } -INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *, - struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, @@ -319,7 +318,6 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, return inet_gro_receive(head, skb); } -INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { -- cgit From 6db693285cd109e566c553694002dea769612b24 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Jun 2020 15:31:15 -0700 Subject: udp: move gro declarations to net/udp.h This removes following warnings : CC net/ipv4/udp_offload.o net/ipv4/udp_offload.c:504:17: warning: no previous prototype for 'udp4_gro_receive' [-Wmissing-prototypes] 504 | struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) | ^~~~~~~~~~~~~~~~ net/ipv4/udp_offload.c:584:29: warning: no previous prototype for 'udp4_gro_complete' [-Wmissing-prototypes] 584 | INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) | ^~~~~~~~~~~~~~~~~ CHECK net/ipv6/udp_offload.c net/ipv6/udp_offload.c:115:16: warning: symbol 'udp6_gro_receive' was not declared. Should it be static? net/ipv6/udp_offload.c:148:29: warning: symbol 'udp6_gro_complete' was not declared. Should it be static? CC net/ipv6/udp_offload.o net/ipv6/udp_offload.c:115:17: warning: no previous prototype for 'udp6_gro_receive' [-Wmissing-prototypes] 115 | struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) | ^~~~~~~~~~~~~~~~ net/ipv6/udp_offload.c:148:29: warning: no previous prototype for 'udp6_gro_complete' [-Wmissing-prototypes] 148 | INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) | ^~~~~~~~~~~~~~~~~ Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 --- net/ipv6/ip6_offload.c | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d8dbff1dd1fa..ea6ed6d487ed 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1432,8 +1432,6 @@ static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, return inet_gso_segment(skb, features); } -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, - struct sk_buff *)); struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb) { const struct net_offload *ops; @@ -1606,7 +1604,6 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) return -EINVAL; } -INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 78eec5b42385..a80f90bf3ae7 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "ip6_offload.h" @@ -178,8 +179,6 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph, return len; } -INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, - struct sk_buff *)); INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, struct sk_buff *skb) { @@ -318,7 +317,6 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head, return inet_gro_receive(head, skb); } -INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; -- cgit From 6f3934576853a4fa60dea74ac8822f0f016ef9e8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 Jun 2020 18:07:41 -0500 Subject: net: ipv6: Use struct_size() helper and kcalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. Also, remove unnecessary function ipv6_rpl_srh_alloc_size() and replace kzalloc() with kcalloc(), which has a 2-factor argument form for multiplication. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 2 +- net/ipv6/rpl_iptunnel.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5a8bbcdcaf2b..e9b366994475 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -580,7 +580,7 @@ looped_back: hdr->segments_left--; i = n - hdr->segments_left; - buf = kzalloc(ipv6_rpl_srh_alloc_size(n + 1) * 2, GFP_ATOMIC); + buf = kcalloc(struct_size(hdr, segments.addr, n + 2), 2, GFP_ATOMIC); if (unlikely(!buf)) { kfree_skb(skb); return -1; diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index c3ececd7cfc1..5fdf3ebb953f 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -136,8 +136,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, oldhdr = ipv6_hdr(skb); - buf = kzalloc(ipv6_rpl_srh_alloc_size(srh->segments_left - 1) * 2, - GFP_ATOMIC); + buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC); if (!buf) return -ENOMEM; -- cgit From 0cc55e694e851921667dc80972a86feb1ca331ef Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Mon, 22 Jun 2020 22:50:39 -0400 Subject: dcb_doit: remove redundant skb check skb cannot be NULL here since its already being accessed before: sock_net(skb->sk). Remove the redundant null check. Signed-off-by: Gaurav Singh Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d2a4553bcf39..84dde5a2066e 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1736,7 +1736,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct net_device *netdev; struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; - u32 portid = skb ? NETLINK_CB(skb).portid : 0; + u32 portid = NETLINK_CB(skb).portid; int ret = -EINVAL; struct sk_buff *reply_skb; struct nlmsghdr *reply_nlh = NULL; -- cgit From f9215d6bb53ae1bd2268369d77b3f0855a27ba47 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Mon, 22 Jun 2020 23:41:19 -0400 Subject: dn_route_rcv: remove redundant dev null check dev cannot be NULL here since its already being accessed before. Remove the redundant null check. Signed-off-by: Gaurav Singh Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 06b9983325cc..9eb7e4b62d9b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -670,7 +670,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type if (decnet_debug_level & 1) printk(KERN_DEBUG "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n", - (int)flags, (dev) ? dev->name : "???", len, skb->len, + (int)flags, dev->name, len, skb->len, padlen); if (flags & DN_RT_PKT_CNTL) { -- cgit From 428d2459cceb77357b81c242ca22462a6a904817 Mon Sep 17 00:00:00 2001 From: Petr Vaněk Date: Sat, 30 May 2020 14:39:12 +0200 Subject: xfrm: introduce oseq-may-wrap flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 4303 in section 3.3.3 suggests to disable anti-replay for manually distributed ICVs in which case the sender does not need to monitor or reset the counter. However, the sender still increments the counter and when it reaches the maximum value, the counter rolls over back to zero. This patch introduces new extra_flag XFRM_SA_XFLAG_OSEQ_MAY_WRAP which allows sequence number to cycle in outbound packets if set. This flag is used only in legacy and bmp code, because esn should not be negotiated if anti-replay is disabled (see note in 3.3.3 section). Signed-off-by: Petr Vaněk Acked-by: Christophe Gouault Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 98943f8d01aa..c6a4338a0d08 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -89,7 +89,8 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; XFRM_SKB_CB(skb)->seq.output.hi = 0; - if (unlikely(x->replay.oseq == 0)) { + if (unlikely(x->replay.oseq == 0) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -168,7 +169,8 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; XFRM_SKB_CB(skb)->seq.output.hi = 0; - if (unlikely(replay_esn->oseq == 0)) { + if (unlikely(replay_esn->oseq == 0) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { replay_esn->oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -572,7 +574,8 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk XFRM_SKB_CB(skb)->seq.output.hi = 0; xo->seq.hi = 0; - if (unlikely(oseq < x->replay.oseq)) { + if (unlikely(oseq < x->replay.oseq) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -611,7 +614,8 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff XFRM_SKB_CB(skb)->seq.output.hi = 0; xo->seq.hi = 0; - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(oseq < replay_esn->oseq) && + !(x->props.extra_flags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP)) { xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; -- cgit From dfde1d7dee9bfd095a4f16c9e0579a10f4092e81 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:50 +0300 Subject: sock: Move sock_valbool_flag to header This is preparation for usage in bpf_setsockopt. Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200620153052.9439-1-zeil@yandex-team.ru --- net/core/sock.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 6c4acf1f0220..5ba4753bc04d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -695,15 +695,6 @@ out: return ret; } -static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit, - int valbool) -{ - if (valbool) - sock_set_flag(sk, bit); - else - sock_reset_flag(sk, bit); -} - bool sk_mc_loop(struct sock *sk) { if (dev_recursion_level()) -- cgit From aad4a0a9513af962137c4842463d11ed491eec37 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:51 +0300 Subject: tcp: Expose tcp_sock_set_keepidle_locked This is preparation for usage in bpf_setsockopt. v2: - remove redundant EXPORT_SYMBOL (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200620153052.9439-2-zeil@yandex-team.ru --- net/ipv4/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 810cc164f795..de36c91d32ea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2957,7 +2957,7 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val) } EXPORT_SYMBOL(tcp_sock_set_user_timeout); -static int __tcp_sock_set_keepidle(struct sock *sk, int val) +int tcp_sock_set_keepidle_locked(struct sock *sk, int val) { struct tcp_sock *tp = tcp_sk(sk); @@ -2984,7 +2984,7 @@ int tcp_sock_set_keepidle(struct sock *sk, int val) int err; lock_sock(sk); - err = __tcp_sock_set_keepidle(sk, val); + err = tcp_sock_set_keepidle_locked(sk, val); release_sock(sk); return err; } @@ -3183,7 +3183,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_KEEPIDLE: - err = __tcp_sock_set_keepidle(sk, val); + err = tcp_sock_set_keepidle_locked(sk, val); break; case TCP_KEEPINTVL: if (val < 1 || val > MAX_TCP_KEEPINTVL) -- cgit From f9bcf96837f158db6ea982d15cd2c8161ca6bc23 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:52 +0300 Subject: bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt This patch adds support of SO_KEEPALIVE flag and TCP related options to bpf_setsockopt() routine. This is helpful if we want to enable or tune TCP keepalive for applications which don't do it in the userspace code. v3: - update kernel-doc in uapi (Nikita Vetoshkin ) v4: - update kernel-doc in tools too (Alexei Starovoitov) - add test to selftests (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru --- net/core/filter.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 73395384afe2..c713b6b8938f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4289,10 +4289,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen, u32 flags) { char devname[IFNAMSIZ]; + int val, valbool; struct net *net; int ifindex; int ret = 0; - int val; if (!sk_fullsock(sk)) return -EINVAL; @@ -4303,6 +4303,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, if (optlen != sizeof(int) && optname != SO_BINDTODEVICE) return -EINVAL; val = *((int *)optval); + valbool = val ? 1 : 0; /* Only some socketops are supported */ switch (optname) { @@ -4361,6 +4362,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, } ret = sock_bindtoindex(sk, ifindex, false); break; + case SO_KEEPALIVE: + if (sk->sk_prot->keepalive) + sk->sk_prot->keepalive(sk, valbool); + sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); + break; default: ret = -EINVAL; } @@ -4421,6 +4427,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ret = tcp_set_congestion_control(sk, name, false, reinit, true); } else { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); if (optlen != sizeof(int)) @@ -4449,6 +4456,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, else tp->save_syn = val; break; + case TCP_KEEPIDLE: + ret = tcp_sock_set_keepidle_locked(sk, val); + break; + case TCP_KEEPINTVL: + if (val < 1 || val > MAX_TCP_KEEPINTVL) + ret = -EINVAL; + else + tp->keepalive_intvl = val * HZ; + break; + case TCP_KEEPCNT: + if (val < 1 || val > MAX_TCP_KEEPCNT) + ret = -EINVAL; + else + tp->keepalive_probes = val; + break; + case TCP_SYNCNT: + if (val < 1 || val > MAX_TCP_SYNCNT) + ret = -EINVAL; + else + icsk->icsk_syn_retries = val; + break; + case TCP_USER_TIMEOUT: + if (val < 0) + ret = -EINVAL; + else + icsk->icsk_user_timeout = val; + break; default: ret = -EINVAL; } -- cgit From 3a0377d993d7c62cbff623bce13eac077490f560 Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Wed, 24 Jun 2020 11:34:19 -0700 Subject: Bluetooth: Don't restart scanning if paused When restarting LE scanning, check if it's currently paused before enabling passive scanning. Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 86ae4b953a01..116207009dde 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -819,6 +819,11 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, { struct hci_dev *hdev = req->hdev; + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return; + } + /* Use ext scanning if set ext scan param and ext scan enable is * supported */ @@ -2657,6 +2662,11 @@ static int le_scan_restart(struct hci_request *req, unsigned long opt) if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return 0; + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return 0; + } + hci_req_add_le_scan_disable(req); if (use_ext_scan(hdev)) { -- cgit From 0592ff88347b5e13e31711a20a21c2ef2397f80b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:15 +0300 Subject: net: bridge: fdb_add_entry takes ndm as argument We can just pass ndm as an argument instead of its fields separately. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4877a0db16c6..ed80d9ab0fb9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -793,11 +793,11 @@ errout: /* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, - const u8 *addr, u16 state, u16 flags, u16 vid, - u8 ndm_flags) + const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid) { - bool is_sticky = !!(ndm_flags & NTF_STICKY); + bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY); struct net_bridge_fdb_entry *fdb; + u16 state = ndm->ndm_state; bool modified = false; /* If the port cannot learn allow only local and static entries */ @@ -893,8 +893,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, err = br_fdb_external_learn_add(br, p, addr, vid, true); } else { spin_lock_bh(&br->hash_lock); - err = fdb_add_entry(br, p, addr, ndm->ndm_state, - nlh_flags, vid, ndm->ndm_flags); + err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid); spin_unlock_bh(&br->hash_lock); } -- cgit From 899426b3bdd947541ba4af8c767575889c8b842a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:16 +0300 Subject: net: neighbor: add fdb extended attribute Add an attribute to NDA which will contain all future fdb-specific attributes in order to avoid polluting the NDA namespace with e.g. bridge or vxlan specific attributes. The attribute is called NDA_FDB_EXT_ATTRS and the structure would look like: [NDA_FDB_EXT_ATTRS] = { [NFEA_xxx] } Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ef6b5a8f629c..8e39e28b0a8d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1783,6 +1783,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, [NDA_NH_ID] = { .type = NLA_U32 }, + [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, }; static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit From 31cbc39b6344916c20452e43a9171009214c409c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:17 +0300 Subject: net: bridge: add option to allow activity notifications for any fdb entries This patch adds the ability to notify about activity of any entries (static, permanent or ext_learn). EVPN multihoming peers need it to properly and efficiently handle mac sync (peer active/locally active). We add a new NFEA_ACTIVITY_NOTIFY attribute which is used to dump the current activity state and to control if static entries should be monitored at all. We use 2 bits - one to activate fdb entry tracking (disabled by default) and the second to denote that an entry is inactive. We need the second bit in order to avoid multiple notifications of inactivity. Obviously this makes no difference for dynamic entries since at the time of inactivity they get deleted, while the tracked non-dynamic entries get the inactive bit set and get a notification. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 117 ++++++++++++++++++++++++++++++++++++++++++------ net/bridge/br_private.h | 4 ++ 2 files changed, 108 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ed80d9ab0fb9..642deb57c064 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -349,12 +349,21 @@ void br_fdb_cleanup(struct work_struct *work) */ rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { - unsigned long this_timer; + unsigned long this_timer = f->updated + delay; if (test_bit(BR_FDB_STATIC, &f->flags) || - test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) + test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) { + if (test_bit(BR_FDB_NOTIFY, &f->flags)) { + if (time_after(this_timer, now)) + work_delay = min(work_delay, + this_timer - now); + else if (!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, + &f->flags)) + fdb_notify(br, f, RTM_NEWNEIGH, false); + } continue; - this_timer = f->updated + delay; + } + if (time_after(this_timer, now)) { work_delay = min(work_delay, this_timer - now); } else { @@ -556,11 +565,17 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return ret; } +/* returns true if the fdb was modified */ +static bool __fdb_mark_active(struct net_bridge_fdb_entry *fdb) +{ + return !!(test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags) && + test_and_clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)); +} + void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid, unsigned long flags) { struct net_bridge_fdb_entry *fdb; - bool fdb_modified = false; /* some users want to always flood. */ if (hold_time(br) == 0) @@ -575,6 +590,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, source->dev->name, addr, vid); } else { unsigned long now = jiffies; + bool fdb_modified = false; + + if (now != fdb->updated) { + fdb->updated = now; + fdb_modified = __fdb_mark_active(fdb); + } /* fastpath: update of existing entry */ if (unlikely(source != fdb->dst && @@ -587,8 +608,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, clear_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags); } - if (now != fdb->updated) - fdb->updated = now; + if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); if (unlikely(fdb_modified)) { @@ -667,6 +687,23 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, &fdb->key.vlan_id)) goto nla_put_failure; + if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) { + struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS); + u8 notify_bits = FDB_NOTIFY_BIT; + + if (!nest) + goto nla_put_failure; + if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) + notify_bits |= FDB_NOTIFY_INACTIVE_BIT; + + if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) { + nla_nest_cancel(skb, nest); + goto nla_put_failure; + } + + nla_nest_end(skb, nest); + } + nlmsg_end(skb, nlh); return 0; @@ -681,7 +718,9 @@ static inline size_t fdb_nlmsg_size(void) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + nla_total_size(sizeof(u16)) /* NDA_VLAN */ - + nla_total_size(sizeof(struct nda_cacheinfo)); + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */ + + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ } static void fdb_notify(struct net_bridge *br, @@ -791,14 +830,40 @@ errout: return err; } +/* returns true if the fdb is modified */ +static bool fdb_handle_notify(struct net_bridge_fdb_entry *fdb, u8 notify) +{ + bool modified = false; + + /* allow to mark an entry as inactive, usually done on creation */ + if ((notify & FDB_NOTIFY_INACTIVE_BIT) && + !test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) + modified = true; + + if ((notify & FDB_NOTIFY_BIT) && + !test_and_set_bit(BR_FDB_NOTIFY, &fdb->flags)) { + /* enabled activity tracking */ + modified = true; + } else if (!(notify & FDB_NOTIFY_BIT) && + test_and_clear_bit(BR_FDB_NOTIFY, &fdb->flags)) { + /* disabled activity tracking, clear notify state */ + clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags); + modified = true; + } + + return modified; +} + /* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, - const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid) + const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid, + struct nlattr *nfea_tb[]) { bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY); struct net_bridge_fdb_entry *fdb; u16 state = ndm->ndm_state; bool modified = false; + u8 notify = 0; /* If the port cannot learn allow only local and static entries */ if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) && @@ -815,6 +880,13 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (is_sticky && (state & NUD_PERMANENT)) return -EINVAL; + if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) { + notify = nla_get_u8(nfea_tb[NFEA_ACTIVITY_NOTIFY]); + if ((notify & ~BR_FDB_NOTIFY_SETTABLE_BITS) || + (notify & BR_FDB_NOTIFY_SETTABLE_BITS) == FDB_NOTIFY_INACTIVE_BIT) + return -EINVAL; + } + fdb = br_fdb_find(br, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) @@ -858,6 +930,9 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, modified = true; } + if (fdb_handle_notify(fdb, notify)) + modified = true; + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); fdb->used = jiffies; @@ -871,7 +946,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, - u16 nlh_flags, u16 vid) + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[]) { int err = 0; @@ -893,19 +968,24 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, err = br_fdb_external_learn_add(br, p, addr, vid, true); } else { spin_lock_bh(&br->hash_lock); - err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid); + err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); spin_unlock_bh(&br->hash_lock); } return err; } +static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = { + [NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 }, +}; + /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, struct netlink_ext_ack *extack) { + struct nlattr *nfea_tb[NFEA_MAX + 1], *attr; struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; struct net_bridge_vlan *v; @@ -938,6 +1018,16 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], vg = nbp_vlan_group(p); } + if (tb[NDA_FDB_EXT_ATTRS]) { + attr = tb[NDA_FDB_EXT_ATTRS]; + err = nla_parse_nested(nfea_tb, NFEA_MAX, attr, + br_nda_fdb_pol, extack); + if (err) + return err; + } else { + memset(nfea_tb, 0, sizeof(struct nlattr *) * (NFEA_MAX + 1)); + } + if (vid) { v = br_vlan_find(vg, vid); if (!v || !br_vlan_should_use(v)) { @@ -946,9 +1036,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb); } else { - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb); if (err || !vg || !vg->num_vlans) goto out; @@ -959,7 +1049,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, + nfea_tb); if (err) goto out; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7501be4eeba0..c0ae639e1b36 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -48,6 +48,8 @@ enum { /* Path to usermode spanning tree program */ #define BR_STP_PROG "/sbin/bridge-stp" +#define BR_FDB_NOTIFY_SETTABLE_BITS (FDB_NOTIFY_BIT | FDB_NOTIFY_INACTIVE_BIT) + typedef struct bridge_id bridge_id; typedef struct mac_addr mac_addr; typedef __u16 port_id; @@ -184,6 +186,8 @@ enum { BR_FDB_ADDED_BY_USER, BR_FDB_ADDED_BY_EXT_LEARN, BR_FDB_OFFLOADED, + BR_FDB_NOTIFY, + BR_FDB_NOTIFY_INACTIVE }; struct net_bridge_fdb_key { -- cgit From b5f1d9ec283bd28a452cf61d7e5c2f2b1a9cccda Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 23 Jun 2020 23:47:18 +0300 Subject: net: bridge: add a flag to avoid refreshing fdb when changing/adding When we modify or create a new fdb entry sometimes we want to avoid refreshing its activity in order to track it properly. One example is when a mac is received from EVPN multi-homing peer by FRR, which doesn't want to change local activity accounting. It makes it static and sets a flag to track its activity. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 642deb57c064..9db504baa094 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -860,6 +860,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, struct nlattr *nfea_tb[]) { bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY); + bool refresh = !nfea_tb[NFEA_DONT_REFRESH]; struct net_bridge_fdb_entry *fdb; u16 state = ndm->ndm_state; bool modified = false; @@ -937,7 +938,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, fdb->used = jiffies; if (modified) { - fdb->updated = jiffies; + if (refresh) + fdb->updated = jiffies; fdb_notify(br, fdb, RTM_NEWNEIGH, true); } @@ -977,6 +979,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = { [NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 }, + [NFEA_DONT_REFRESH] = { .type = NLA_FLAG }, }; /* Add new permanent fdb entry with RTM_NEWNEIGH */ -- cgit From b08d4d3b6c0460306e8a0608413b201705200d33 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:04 -0700 Subject: net: bpf: Add bpf_seq_afinfo in tcp_iter_state A new field bpf_seq_afinfo is added to tcp_iter_state to provide bpf tcp iterator afinfo. There are two reasons on why we did this. First, the current way to get afinfo from PDE_DATA does not work for bpf iterator as its seq_file inode does not conform to /proc/net/{tcp,tcp6} inode structures. More specifically, anonymous bpf iterator will use an anonymous inode which is shared in the system and we cannot change inode private data structure at all. Second, bpf iterator for tcp/tcp6 wants to traverse all tcp and tcp6 sockets in one pass and bpf program can control whether they want to skip one sk_family or not. Having a different afinfo with family AF_UNSPEC make it easier to understand in the code. This patch does not change /proc/net/{tcp,tcp6} behavior as the bpf_seq_afinfo will be NULL for these two proc files. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230804.3987829-1-yhs@fb.com --- net/ipv4/tcp_ipv4.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad6435ba6d72..9cb65ee4ec63 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); */ static void *listening_get_next(struct seq_file *seq, void *cur) { - struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); + struct tcp_seq_afinfo *afinfo; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; struct hlist_nulls_node *node; struct sock *sk = cur; + if (st->bpf_seq_afinfo) + afinfo = st->bpf_seq_afinfo; + else + afinfo = PDE_DATA(file_inode(seq->file)); + if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; @@ -2235,7 +2240,8 @@ get_sk: sk_nulls_for_each_from(sk, node) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == afinfo->family) + if (afinfo->family == AF_UNSPEC || + sk->sk_family == afinfo->family) return sk; } spin_unlock(&ilb->lock); @@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) */ static void *established_get_first(struct seq_file *seq) { - struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); + struct tcp_seq_afinfo *afinfo; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; + if (st->bpf_seq_afinfo) + afinfo = st->bpf_seq_afinfo; + else + afinfo = PDE_DATA(file_inode(seq->file)); + st->offset = 0; for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; @@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq) spin_lock_bh(lock); sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if (sk->sk_family != afinfo->family || + if ((afinfo->family != AF_UNSPEC && + sk->sk_family != afinfo->family) || !net_eq(sock_net(sk), net)) { continue; } @@ -2304,19 +2316,25 @@ out: static void *established_get_next(struct seq_file *seq, void *cur) { - struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); + struct tcp_seq_afinfo *afinfo; struct sock *sk = cur; struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); + if (st->bpf_seq_afinfo) + afinfo = st->bpf_seq_afinfo; + else + afinfo = PDE_DATA(file_inode(seq->file)); + ++st->num; ++st->offset; sk = sk_nulls_next(sk); sk_nulls_for_each_from(sk, node) { - if (sk->sk_family == afinfo->family && + if ((afinfo->family == AF_UNSPEC || + sk->sk_family == afinfo->family) && net_eq(sock_net(sk), net)) return sk; } -- cgit From 52d87d5f6418ba1b8b449ed5eea1532664896851 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:05 -0700 Subject: net: bpf: Implement bpf iterator for tcp The bpf iterator for tcp is implemented. Both tcp4 and tcp6 sockets will be traversed. It is up to bpf program to filter for tcp4 or tcp6 only, or both families of sockets. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230805.3987959-1-yhs@fb.com --- net/ipv4/tcp_ipv4.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9cb65ee4ec63..ea0df9fd7618 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2613,6 +2613,74 @@ out: return 0; } +#ifdef CONFIG_BPF_SYSCALL +struct bpf_iter__tcp { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct sock_common *, sk_common); + uid_t uid __aligned(8); +}; + +static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, + struct sock_common *sk_common, uid_t uid) +{ + struct bpf_iter__tcp ctx; + + meta->seq_num--; /* skip SEQ_START_TOKEN */ + ctx.meta = meta; + ctx.sk_common = sk_common; + ctx.uid = uid; + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_prog *prog; + struct sock *sk = v; + uid_t uid; + + if (v == SEQ_START_TOKEN) + return 0; + + if (sk->sk_state == TCP_TIME_WAIT) { + uid = 0; + } else if (sk->sk_state == TCP_NEW_SYN_RECV) { + const struct request_sock *req = v; + + uid = from_kuid_munged(seq_user_ns(seq), + sock_i_uid(req->rsk_listener)); + } else { + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); + } + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, false); + return tcp_prog_seq_show(prog, &meta, v, uid); +} + +static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + if (!v) { + meta.seq = seq; + prog = bpf_iter_get_info(&meta, true); + if (prog) + (void)tcp_prog_seq_show(prog, &meta, v, 0); + } + + tcp_seq_stop(seq, v); +} + +static const struct seq_operations bpf_iter_tcp_seq_ops = { + .show = bpf_iter_tcp_seq_show, + .start = tcp_seq_start, + .next = tcp_seq_next, + .stop = bpf_iter_tcp_seq_stop, +}; +#endif + static const struct seq_operations tcp4_seq_ops = { .show = tcp4_seq_show, .start = tcp_seq_start, @@ -2844,8 +2912,63 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { .exit_batch = tcp_sk_exit_batch, }; +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, + struct sock_common *sk_common, uid_t uid) + +static int bpf_iter_init_tcp(void *priv_data) +{ + struct tcp_iter_state *st = priv_data; + struct tcp_seq_afinfo *afinfo; + int ret; + + afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); + if (!afinfo) + return -ENOMEM; + + afinfo->family = AF_UNSPEC; + st->bpf_seq_afinfo = afinfo; + ret = bpf_iter_init_seq_net(priv_data); + if (ret) + kfree(afinfo); + return ret; +} + +static void bpf_iter_fini_tcp(void *priv_data) +{ + struct tcp_iter_state *st = priv_data; + + kfree(st->bpf_seq_afinfo); + bpf_iter_fini_seq_net(priv_data); +} + +static const struct bpf_iter_reg tcp_reg_info = { + .target = "tcp", + .seq_ops = &bpf_iter_tcp_seq_ops, + .init_seq_private = bpf_iter_init_tcp, + .fini_seq_private = bpf_iter_fini_tcp, + .seq_priv_size = sizeof(struct tcp_iter_state), + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__tcp, sk_common), + PTR_TO_BTF_ID_OR_NULL }, + }, +}; + +static void __init bpf_iter_register(void) +{ + if (bpf_iter_reg_target(&tcp_reg_info)) + pr_warn("Warning: could not register bpf iterator tcp\n"); +} + +#endif + void __init tcp_v4_init(void) { if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); + +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) + bpf_iter_register(); +#endif } -- cgit From af7ec13833619e17f03aa73a785a2f871da6d66b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:09 -0700 Subject: bpf: Add bpf_skc_to_tcp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a tcp6_sock pointer. The return value could be NULL if the casting is illegal. A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added so the verifier is able to deduce proper return types for the helper. Different from the previous BTF_ID based helpers, the bpf_skc_to_tcp6_sock() argument can be several possible btf_ids. More specifically, all possible socket data structures with sock_common appearing in the first in the memory layout. This patch only added socket types related to tcp and udp. All possible argument btf_id and return value btf_id for helper bpf_skc_to_tcp6_sock() are pre-calculcated and cached. In the future, it is even possible to precompute these btf_id's at kernel build time. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230809.3988195-1-yhs@fb.com --- net/core/filter.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index c713b6b8938f..176e27d75c51 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -9225,3 +9226,84 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) { bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } + +/* Define a list of socket types which can be the argument for + * skc_to_*_sock() helpers. All these sockets should have + * sock_common as the first argument in its memory layout. + */ +#define BTF_SOCK_TYPE_xxx \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock") + +enum { +#define BTF_SOCK_TYPE(name, str) name, +BTF_SOCK_TYPE_xxx +#undef BTF_SOCK_TYPE +MAX_BTF_SOCK_TYPE, +}; + +static int btf_sock_ids[MAX_BTF_SOCK_TYPE]; + +#ifdef CONFIG_BPF_SYSCALL +static const char *bpf_sock_types[] = { +#define BTF_SOCK_TYPE(name, str) str, +BTF_SOCK_TYPE_xxx +#undef BTF_SOCK_TYPE +}; + +void init_btf_sock_ids(struct btf *btf) +{ + int i, btf_id; + + for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) { + btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i], + BTF_KIND_STRUCT); + if (btf_id > 0) + btf_sock_ids[i] = btf_id; + } +} +#endif + +static bool check_arg_btf_id(u32 btf_id, u32 arg) +{ + int i; + + /* only one argument, no need to check arg */ + for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) + if (btf_sock_ids[i] == btf_id) + return true; + return false; +} + +BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) +{ + /* tcp6_sock type is not generated in dwarf and hence btf, + * trigger an explicit type generation here. + */ + BTF_TYPE_EMIT(struct tcp6_sock); + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && + sk->sk_family == AF_INET6) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { + .func = bpf_skc_to_tcp6_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6], +}; -- cgit From 478cfbdf5f13dfe09cfd0b1cbac821f5e27f6108 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:11 -0700 Subject: bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers Three more helpers are added to cast a sock_common pointer to an tcp_sock, tcp_timewait_sock or a tcp_request_sock for tracing programs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230811.3988277-1-yhs@fb.com --- net/core/filter.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 176e27d75c51..0b4e5aed7e20 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -74,6 +74,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -9307,3 +9308,64 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = { .check_btf_id = check_arg_btf_id, .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6], }; + +BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk) +{ + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { + .func = bpf_skc_to_tcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP], +}; + +BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) +{ + if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) + return (unsigned long)sk; + +#if IS_BUILTIN(CONFIG_IPV6) + if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) + return (unsigned long)sk; +#endif + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { + .func = bpf_skc_to_tcp_timewait_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW], +}; + +BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) +{ + if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) + return (unsigned long)sk; + +#if IS_BUILTIN(CONFIG_IPV6) + if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) + return (unsigned long)sk; +#endif + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = { + .func = bpf_skc_to_tcp_request_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ], +}; -- cgit From 9e8ca27afab6c92477b459f6a5d2af0cd3197c20 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:12 -0700 Subject: net: bpf: Add bpf_seq_afinfo in udp_iter_state Similar to tcp_iter_state, a new field bpf_seq_afinfo is added to udp_iter_state to provide bpf udp iterator afinfo. This does not change /proc/net/{udp, udp6} behavior. But it enables bpf iterator to avoid get afinfo from PDE_DATA and iterate through all udp and udp6 sockets in one pass. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230812.3988347-1-yhs@fb.com --- net/ipv4/udp.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b7ebbcae497..90355301b266 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2826,10 +2826,15 @@ EXPORT_SYMBOL(udp_prot); static struct sock *udp_get_first(struct seq_file *seq, int start) { struct sock *sk; - struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); + struct udp_seq_afinfo *afinfo; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + if (state->bpf_seq_afinfo) + afinfo = state->bpf_seq_afinfo; + else + afinfo = PDE_DATA(file_inode(seq->file)); + for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; ++state->bucket) { struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket]; @@ -2841,7 +2846,8 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) sk_for_each(sk, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; - if (sk->sk_family == afinfo->family) + if (afinfo->family == AF_UNSPEC || + sk->sk_family == afinfo->family) goto found; } spin_unlock_bh(&hslot->lock); @@ -2853,13 +2859,20 @@ found: static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { - struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); + struct udp_seq_afinfo *afinfo; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + if (state->bpf_seq_afinfo) + afinfo = state->bpf_seq_afinfo; + else + afinfo = PDE_DATA(file_inode(seq->file)); + do { sk = sk_next(sk); - } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family)); + } while (sk && (!net_eq(sock_net(sk), net) || + (afinfo->family != AF_UNSPEC && + sk->sk_family != afinfo->family))); if (!sk) { if (state->bucket <= afinfo->udp_table->mask) @@ -2904,9 +2917,14 @@ EXPORT_SYMBOL(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { - struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file)); + struct udp_seq_afinfo *afinfo; struct udp_iter_state *state = seq->private; + if (state->bpf_seq_afinfo) + afinfo = state->bpf_seq_afinfo; + else + afinfo = PDE_DATA(file_inode(seq->file)); + if (state->bucket <= afinfo->udp_table->mask) spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); } -- cgit From 5788b3a07fc5863606c3b92fa7b1ffe125e6eb4c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:13 -0700 Subject: net: bpf: Implement bpf iterator for udp The bpf iterator for udp is implemented. Both udp4 and udp6 sockets will be traversed. It is up to bpf program to filter for udp4 or udp6 only, or both families of sockets. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230813.3988404-1-yhs@fb.com --- net/ipv4/udp.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 90355301b266..31530129f137 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2968,6 +2968,67 @@ int udp4_seq_show(struct seq_file *seq, void *v) return 0; } +#ifdef CONFIG_BPF_SYSCALL +struct bpf_iter__udp { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct udp_sock *, udp_sk); + uid_t uid __aligned(8); + int bucket __aligned(8); +}; + +static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, + struct udp_sock *udp_sk, uid_t uid, int bucket) +{ + struct bpf_iter__udp ctx; + + meta->seq_num--; /* skip SEQ_START_TOKEN */ + ctx.meta = meta; + ctx.udp_sk = udp_sk; + ctx.uid = uid; + ctx.bucket = bucket; + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v) +{ + struct udp_iter_state *state = seq->private; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + struct sock *sk = v; + uid_t uid; + + if (v == SEQ_START_TOKEN) + return 0; + + uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); + meta.seq = seq; + prog = bpf_iter_get_info(&meta, false); + return udp_prog_seq_show(prog, &meta, v, uid, state->bucket); +} + +static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + if (!v) { + meta.seq = seq; + prog = bpf_iter_get_info(&meta, true); + if (prog) + (void)udp_prog_seq_show(prog, &meta, v, 0, 0); + } + + udp_seq_stop(seq, v); +} + +static const struct seq_operations bpf_iter_udp_seq_ops = { + .start = udp_seq_start, + .next = udp_seq_next, + .stop = bpf_iter_udp_seq_stop, + .show = bpf_iter_udp_seq_show, +}; +#endif + const struct seq_operations udp_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, @@ -3085,6 +3146,57 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { .init = udp_sysctl_init, }; +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, + struct udp_sock *udp_sk, uid_t uid, int bucket) + +static int bpf_iter_init_udp(void *priv_data) +{ + struct udp_iter_state *st = priv_data; + struct udp_seq_afinfo *afinfo; + int ret; + + afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); + if (!afinfo) + return -ENOMEM; + + afinfo->family = AF_UNSPEC; + afinfo->udp_table = &udp_table; + st->bpf_seq_afinfo = afinfo; + ret = bpf_iter_init_seq_net(priv_data); + if (ret) + kfree(afinfo); + return ret; +} + +static void bpf_iter_fini_udp(void *priv_data) +{ + struct udp_iter_state *st = priv_data; + + kfree(st->bpf_seq_afinfo); + bpf_iter_fini_seq_net(priv_data); +} + +static const struct bpf_iter_reg udp_reg_info = { + .target = "udp", + .seq_ops = &bpf_iter_udp_seq_ops, + .init_seq_private = bpf_iter_init_udp, + .fini_seq_private = bpf_iter_fini_udp, + .seq_priv_size = sizeof(struct udp_iter_state), + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__udp, udp_sk), + PTR_TO_BTF_ID_OR_NULL }, + }, +}; + +static void __init bpf_iter_register(void) +{ + if (bpf_iter_reg_target(&udp_reg_info)) + pr_warn("Warning: could not register bpf iterator udp\n"); +} +#endif + void __init udp_init(void) { unsigned long limit; @@ -3110,4 +3222,8 @@ void __init udp_init(void) if (register_pernet_subsys(&udp_sysctl_ops)) panic("UDP: failed to init sysctl parameters.\n"); + +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) + bpf_iter_register(); +#endif } -- cgit From 0d4fad3e57df2bf61e8ffc8d12a34b1caf9b8835 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:15 -0700 Subject: bpf: Add bpf_skc_to_udp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a udp6_sock pointer. The return value could be NULL if the casting is illegal. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20200623230815.3988481-1-yhs@fb.com --- net/core/filter.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 0b4e5aed7e20..c796e141ea8e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9369,3 +9369,25 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = { .check_btf_id = check_arg_btf_id, .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ], }; + +BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk) +{ + /* udp6_sock type is not generated in dwarf and hence btf, + * trigger an explicit type generation here. + */ + BTF_TYPE_EMIT(struct udp6_sock); + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP && + sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { + .func = bpf_skc_to_udp6_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID, + .check_btf_id = check_arg_btf_id, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], +}; -- cgit From 19e528dc9af29169fa7cdfa61071805fdef504c6 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Wed, 24 Jun 2020 17:36:28 +0800 Subject: net: qos: add tc police offloading action with max frame size limit Current police offloading support the 'burst'' and 'rate_bytes_ps'. Some hardware own the capability to limit the frame size. If the frame size larger than the setting, the frame would be dropped. For the police action itself already accept the 'mtu' parameter in tc command. But not extend to tc flower offloading. So extend 'mtu' to tc flower offloading. Signed-off-by: Po Liu Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a00a203b2ef5..6aba7d5ba1ec 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3658,6 +3658,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->police.burst = tcf_police_tcfp_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.mtu = tcf_police_tcfp_mtu(act); } else if (is_tcf_ct(act)) { entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); -- cgit From 627e39b1399e72e53895eec6bbec30199ed43de2 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Wed, 24 Jun 2020 17:36:30 +0800 Subject: net: qos: police action add index for tc flower offloading Hardware device may include more than one police entry. Specifying the action's index make it possible for several tc filters to share the same police action when installing the filters. Propagate this index to device drivers through the flow offload intermediate representation, so that drivers could share a single hardware policer between multiple filters. v1->v2 changes: - Update the commit message suggest by Ido Schimmel Signed-off-by: Po Liu Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6aba7d5ba1ec..fdc4c89ca1fa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3659,6 +3659,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); + entry->police.index = act->tcfa_index; } else if (is_tcf_ct(act)) { entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); -- cgit From b8392808eb3fc28e523e28cb258c81ca246deb9b Mon Sep 17 00:00:00 2001 From: Kevin Darbyshire-Bryant Date: Thu, 25 Jun 2020 22:18:00 +0200 Subject: sch_cake: add RFC 8622 LE PHB support to CAKE diffserv handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change tin mapping on diffserv3, 4 & 8 for LE PHB support, in essence making LE a member of the Bulk tin. Bulk has the least priority and minimum of 1/16th total bandwidth in the face of higher priority traffic. NB: Diffserv 3 & 4 swap tin 0 & 1 priorities from the default order as found in diffserv8, in case anyone is wondering why it looks a bit odd. Signed-off-by: Kevin Darbyshire-Bryant [ reword commit message slightly ] Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 60f8ae578819..cae367515804 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -312,8 +312,8 @@ static const u8 precedence[] = { }; static const u8 diffserv8[] = { - 2, 5, 1, 2, 4, 2, 2, 2, - 0, 2, 1, 2, 1, 2, 1, 2, + 2, 0, 1, 2, 4, 2, 2, 2, + 1, 2, 1, 2, 1, 2, 1, 2, 5, 2, 4, 2, 4, 2, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 6, 2, 3, 2, 3, 2, 3, 2, @@ -323,7 +323,7 @@ static const u8 diffserv8[] = { }; static const u8 diffserv4[] = { - 0, 2, 0, 0, 2, 0, 0, 0, + 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, @@ -334,7 +334,7 @@ static const u8 diffserv4[] = { }; static const u8 diffserv3[] = { - 0, 0, 0, 0, 2, 0, 0, 0, + 0, 1, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -- cgit From d528510e6dee7a1954560cb1f23438b60236fada Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Fri, 26 Jun 2020 10:35:06 +0200 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 61d8dbe8c954..42b8d1e76dea 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2020.2" +#define BATADV_SOURCE_VERSION "2020.3" #endif /* B.A.T.M.A.N. parameters */ -- cgit From bccb48c89fe3c09f1cbb7c8612e31f5daa1d4541 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 1 Jun 2020 20:13:21 +0200 Subject: batman-adv: Fix typos and grammar in documentation Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 8 ++++---- net/batman-adv/bat_v_elp.c | 10 +++++----- net/batman-adv/bat_v_ogm.c | 14 +++++++------- net/batman-adv/bridge_loop_avoidance.c | 6 +++--- net/batman-adv/distributed-arp-table.c | 2 +- net/batman-adv/fragmentation.c | 6 +++--- net/batman-adv/hard-interface.c | 14 +++++++------- net/batman-adv/log.h | 6 +++--- net/batman-adv/main.c | 2 +- net/batman-adv/main.h | 6 +++--- net/batman-adv/multicast.c | 21 +++++++++++---------- net/batman-adv/netlink.c | 2 +- net/batman-adv/network-coding.c | 14 +++++++------- net/batman-adv/originator.c | 8 ++++---- net/batman-adv/routing.c | 4 ++-- net/batman-adv/send.c | 4 ++-- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/tp_meter.c | 12 ++++++------ net/batman-adv/translation-table.c | 10 +++++----- net/batman-adv/tvlv.c | 4 ++-- net/batman-adv/types.h | 12 ++++++------ 21 files changed, 84 insertions(+), 83 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index e87f19c82e8d..5b3a41983156 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -134,7 +134,7 @@ static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) * * Return: the originator object corresponding to the passed mac address or NULL * on failure. - * If the object does not exists it is created an initialised. + * If the object does not exist, it is created and initialised. */ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) @@ -871,7 +871,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) } /** - * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface + * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface * @orig_node: originator which reproadcasted the OGMs directly * @if_outgoing: interface which transmitted the original OGM and received the * direct rebroadcast @@ -1554,7 +1554,7 @@ static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet, * batadv_iv_ogm_process() - process an incoming batman iv OGM * @skb: the skb containing the OGM * @ogm_offset: offset to the OGM which should be processed (for aggregates) - * @if_incoming: the interface where this packet was receved + * @if_incoming: the interface where this packet was received */ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) @@ -2288,7 +2288,7 @@ batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information - * @single_hardif: Limit dump to this hard interfaace + * @single_hardif: Limit dump to this hard interface */ static void batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 0bdefa35da98..d35aca0e969a 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -60,7 +60,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) * @neigh: the neighbour for which the throughput has to be obtained * * Return: The throughput towards the given neighbour in multiples of 100kpbs - * (a value of '1' equals to 0.1Mbps, '10' equals 1Mbps, etc). + * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). */ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) { @@ -183,8 +183,8 @@ void batadv_v_elp_throughput_metric_update(struct work_struct *work) * * Sends a predefined number of unicast wifi packets to a given neighbour in * order to trigger the throughput estimation on this link by the RC algorithm. - * Packets are sent only if there there is not enough payload unicast traffic - * towards this neighbour.. + * Packets are sent only if there is not enough payload unicast traffic towards + * this neighbour.. * * Return: True on success and false in case of error during skb preparation. */ @@ -244,7 +244,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) * batadv_v_elp_periodic_work() - ELP periodic task per interface * @work: work queue item * - * Emits broadcast ELP message in regular intervals. + * Emits broadcast ELP messages in regular intervals. */ static void batadv_v_elp_periodic_work(struct work_struct *work) { @@ -499,7 +499,7 @@ orig_free: * @skb: the received packet * @if_incoming: the interface this packet was received through * - * Return: NET_RX_SUCCESS and consumes the skb if the packet was peoperly + * Return: NET_RX_SUCCESS and consumes the skb if the packet was properly * processed or NET_RX_DROP in case of failure. */ int batadv_v_elp_packet_recv(struct sk_buff *skb, diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 18028b9f95f0..0d404f7bcd9f 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -47,9 +47,9 @@ * @bat_priv: the bat priv with all the soft interface information * @addr: the address of the originator * - * Return: the orig_node corresponding to the specified address. If such object - * does not exist it is allocated here. In case of allocation failure returns - * NULL. + * Return: the orig_node corresponding to the specified address. If such an + * object does not exist, it is allocated here. In case of allocation failure + * returns NULL. */ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) @@ -172,7 +172,7 @@ static bool batadv_v_ogm_queue_left(struct sk_buff *skb, * batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue * @hard_iface: the interface holding the aggregation queue * - * Empties the OGMv2 aggregation queue and frees all the skbs it contained. + * Empties the OGMv2 aggregation queue and frees all the skbs it contains. * * Caller needs to hold the hard_iface->bat_v.aggr_list.lock. */ @@ -378,7 +378,7 @@ static void batadv_v_ogm_send(struct work_struct *work) * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface * @work: work queue item * - * Emits aggregated OGM message in regular intervals. + * Emits aggregated OGM messages in regular intervals. */ void batadv_v_ogm_aggr_work(struct work_struct *work) { @@ -399,7 +399,7 @@ void batadv_v_ogm_aggr_work(struct work_struct *work) * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare * - * Takes care of scheduling own OGM sending routine for this interface. + * Takes care of scheduling its own OGM sending routine for this interface. * * Return: 0 on success or a negative error code otherwise */ @@ -847,7 +847,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, * batadv_v_ogm_process() - process an incoming batman v OGM * @skb: the skb containing the OGM * @ogm_offset: offset to the OGM which should be processed (for aggregates) - * @if_incoming: the interface where this packet was receved + * @if_incoming: the interface where this packet was received */ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 41cc87f06b14..91a04ca373dc 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -992,7 +992,7 @@ static bool batadv_handle_claim(struct batadv_priv *bat_priv, * @hw_dst: the Hardware destination in the ARP Header * @ethhdr: pointer to the Ethernet header of the claim frame * - * checks if it is a claim packet and if its on the same group. + * checks if it is a claim packet and if it's on the same group. * This function also applies the group ID of the sender * if it is in the same mesh. * @@ -1757,7 +1757,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * @vid: the VLAN ID of the frame * * Checks if this packet is a loop detect frame which has been sent by us, - * throw an uevent and log the event if that is the case. + * throws an uevent and logs the event if that is the case. * * Return: true if it is a loop detect frame which is to be dropped, false * otherwise. @@ -1815,7 +1815,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, * * we have to race for a claim * * if the frame is allowed on the LAN * - * in these cases, the skb is further handled by this function + * In these cases, the skb is further handled by this function * * Return: true if handled, otherwise it returns false and the caller shall * further process the skb. diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b85da4b7a77b..0e6e53e9b5f3 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -666,7 +666,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst, * @vid: VLAN identifier * @packet_subtype: unicast4addr packet subtype to use * - * This function copies the skb with pskb_copy() and is sent as unicast packet + * This function copies the skb with pskb_copy() and is sent as a unicast packet * to each of the selected candidates. * * Return: true if the packet is sent to at least one candidate, false diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 7cad97644d05..9fdbe3068153 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -102,8 +102,8 @@ static int batadv_frag_size_limit(void) * * Caller must hold chain->lock. * - * Return: true if chain is empty and caller can just insert the new fragment - * without searching for the right position. + * Return: true if chain is empty and the caller can just insert the new + * fragment without searching for the right position. */ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, u16 seqno) @@ -306,7 +306,7 @@ free: * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb * to NULL; 3) Error: Return false and free skb. * - * Return: true when packet is merged or buffered, false when skb is not not + * Return: true when the packet is merged or buffered, false when skb is not not * used. */ bool batadv_frag_skb_buffer(struct sk_buff **skb, diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 3a256af92784..53c27c67cc11 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -138,10 +138,10 @@ static bool batadv_mutual_parents(const struct net_device *dev1, * @net_dev: the device to check * * If the user creates any virtual device on top of a batman-adv interface, it - * is important to prevent this new interface to be used to create a new mesh - * network (this behaviour would lead to a batman-over-batman configuration). - * This function recursively checks all the fathers of the device passed as - * argument looking for a batman-adv soft interface. + * is important to prevent this new interface from being used to create a new + * mesh network (this behaviour would lead to a batman-over-batman + * configuration). This function recursively checks all the fathers of the + * device passed as argument looking for a batman-adv soft interface. * * Return: true if the device is descendant of a batman-adv mesh interface (or * if it is a batman-adv interface itself), false otherwise @@ -680,8 +680,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) * @slave: the interface enslaved in another master * @master: the master from which slave has to be removed * - * Invoke ndo_del_slave on master passing slave as argument. In this way slave - * is free'd and master can correctly change its internal state. + * Invoke ndo_del_slave on master passing slave as argument. In this way the + * slave is free'd and the master can correctly change its internal state. * * Return: 0 on success, a negative value representing the error otherwise */ @@ -818,7 +818,7 @@ err: * @soft_iface: soft interface to check * * This function is only using RCU for locking - the result can therefore be - * off when another functions is modifying the list at the same time. The + * off when another function is modifying the list at the same time. The * caller can use the rtnl_lock to make sure that the count is accurate. * * Return: number of connected/enslaved hard interfaces diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index f9884dc56cf3..979864c0fa6b 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -69,7 +69,7 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) __printf(2, 3); /** - * _batadv_dbg() - Store debug output with(out) ratelimiting + * _batadv_dbg() - Store debug output with(out) rate limiting * @type: type of debug message * @bat_priv: the bat priv with all the soft interface information * @ratelimited: whether output should be rate limited @@ -95,7 +95,7 @@ static inline void _batadv_dbg(int type __always_unused, #endif /** - * batadv_dbg() - Store debug output without ratelimiting + * batadv_dbg() - Store debug output without rate limiting * @type: type of debug message * @bat_priv: the bat priv with all the soft interface information * @arg: format string and variable arguments @@ -104,7 +104,7 @@ static inline void _batadv_dbg(int type __always_unused, _batadv_dbg(type, bat_priv, 0, ## arg) /** - * batadv_dbg_ratelimited() - Store debug output with ratelimiting + * batadv_dbg_ratelimited() - Store debug output with rate limiting * @type: type of debug message * @bat_priv: the bat priv with all the soft interface information * @arg: format string and variable arguments diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d8a255c85e77..519c08c2cfba 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -666,7 +666,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) * @vid: the VLAN identifier for which the AP isolation attributed as to be * looked up * - * Return: true if AP isolation is on for the VLAN idenfied by vid, false + * Return: true if AP isolation is on for the VLAN identified by vid, false * otherwise */ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 42b8d1e76dea..0393bb9ed3d0 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -308,7 +308,7 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, * @y: value to compare @x against * * It handles overflows/underflows and can correctly check for a predecessor - * unless the variable sequence number has grown by more then + * unless the variable sequence number has grown by more than * 2**(bitwidth(x)-1)-1. * * This means that for a u8 with the maximum value 255, it would think: @@ -330,11 +330,11 @@ static inline bool batadv_has_timed_out(unsigned long timestamp, /** * batadv_seq_after() - Checks if a sequence number x is a successor of y - * @x: potential sucessor of @y + * @x: potential successor of @y * @y: value to compare @x against * * It handles overflows/underflows and can correctly check for a successor - * unless the variable sequence number has grown by more then + * unless the variable sequence number has grown by more than * 2**(bitwidth(x)-1)-1. * * This means that for a u8 with the maximum value 255, it would think: diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 9ebdc1e864b9..bdc4a1fba1c6 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -510,7 +510,7 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, * the given mcast_list. In general, multicast listeners provided by * your multicast receiving applications run directly on this node. * - * If there is a bridge interface on top of dev, collects from that one + * If there is a bridge interface on top of dev, collect from that one * instead. Just like with IP addresses and routes, multicast listeners * will(/should) register to the bridge interface instead of an * enslaved bat0. @@ -832,8 +832,8 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @flags: TVLV flags indicating the new multicast state * - * Whenever the multicast TVLV flags this nodes announces change this notifies - * userspace via the 'mcast' log level. + * Whenever the multicast TVLV flags this node announces change, this function + * should be used to notify userspace about the change. */ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) { @@ -1244,7 +1244,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv) * @ethhdr: an ethernet header to determine the protocol family from * * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or - * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and + * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, sets and * increases its refcount. */ static struct batadv_orig_node * @@ -1693,7 +1693,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, } /** - * batadv_mcast_forw_send() - send packet to any detected multicast recpient + * batadv_mcast_forw_send() - send packet to any detected multicast recipient * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier @@ -1742,7 +1742,8 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator, - * orig, has toggled then this method updates counter and list accordingly. + * orig, has toggled then this method updates the counter and the list + * accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ @@ -1787,7 +1788,7 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv, * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has - * toggled then this method updates counter and list accordingly. + * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ @@ -1832,7 +1833,7 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv, * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has - * toggled then this method updates counter and list accordingly. + * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ @@ -1877,7 +1878,7 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_NO_RTR4 flag of this originator, orig, has - * toggled then this method updates counter and list accordingly. + * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ @@ -1922,7 +1923,7 @@ static void batadv_mcast_want_rtr4_update(struct batadv_priv *bat_priv, * @mcast_flags: flags indicating the new multicast state * * If the BATADV_MCAST_WANT_NO_RTR6 flag of this originator, orig, has - * toggled then this method updates counter and list accordingly. + * toggled then this method updates the counter and the list accordingly. * * Caller needs to hold orig->mcast_handler_lock. */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 02ed073f95a9..cfb00dfa468a 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -640,7 +640,7 @@ batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) * @bat_priv: the bat priv with all the soft interface information * @dst: destination of tp_meter session * @result: reason for tp meter session stop - * @test_time: total time ot the tp_meter session + * @test_time: total time of the tp_meter session * @total_bytes: bytes acked to the receiver * @cookie: cookie of tp_meter session * diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index b0469d15da0e..48d707850f3e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -134,7 +134,7 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, } /** - * batadv_nc_mesh_init() - initialise coding hash table and start house keeping + * batadv_nc_mesh_init() - initialise coding hash table and start housekeeping * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure @@ -700,7 +700,7 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, } /** - * batadv_nc_worker() - periodic task for house keeping related to network + * batadv_nc_worker() - periodic task for housekeeping related to network * coding * @work: kernel work struct */ @@ -1316,7 +1316,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv, } /** - * batadv_nc_skb_src_search() - Loops through the list of neighoring nodes of + * batadv_nc_skb_src_search() - Loops through the list of neighboring nodes of * the skb's sender (may be equal to the originator). * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to forward @@ -1402,10 +1402,10 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, * @neigh_node: next hop to forward packet to * @ethhdr: pointer to the ethernet header inside the skb * - * Loops through list of neighboring nodes the next hop has a good connection to - * (receives OGMs with a sufficient quality). We need to find a neighbor of our - * next hop that potentially sent a packet which our next hop also received - * (overheard) and has stored for later decoding. + * Loops through the list of neighboring nodes the next hop has a good + * connection to (receives OGMs with a sufficient quality). We need to find a + * neighbor of our next hop that potentially sent a packet which our next hop + * also received (overheard) and has stored for later decoding. * * Return: true if the skb was consumed (encoded packet sent) or false otherwise */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5b0c2fffc214..805d8969bdfb 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -325,7 +325,7 @@ void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) * @if_outgoing: the interface where the payload packet has been received or * the OGM should be sent to * - * Return: the neighbor which should be router for this orig_node/iface. + * Return: the neighbor which should be the router for this orig_node/iface. * * The object is returned with refcounter increased by 1. */ @@ -515,7 +515,7 @@ out: * Looks for and possibly returns a neighbour belonging to this originator list * which is connected through the provided hard interface. * - * Return: neighbor when found. Othwerwise NULL + * Return: neighbor when found. Otherwise NULL */ static struct batadv_neigh_node * batadv_neigh_node_get(const struct batadv_orig_node *orig_node, @@ -620,7 +620,7 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, * * Looks for and possibly returns a neighbour belonging to this hard interface. * - * Return: neighbor when found. Othwerwise NULL + * Return: neighbor when found. Otherwise NULL */ struct batadv_hardif_neigh_node * batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, @@ -999,7 +999,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv) * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the originator * - * Creates a new originator object and initialise all the generic fields. + * Creates a new originator object and initialises all the generic fields. * The new object is not added to the originator list. * * Return: the newly created object or NULL on failure. diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index d343382e9664..27cdf5e4349a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -449,7 +449,7 @@ free_skb: * @skb: packet to check * @hdr_size: size of header to pull * - * Check for short header and bad addresses in given packet. + * Checks for short header and bad addresses in the given packet. * * Return: negative value when check fails and 0 otherwise. The negative value * depends on the reason: -ENODATA for bad header, -EBADR for broadcast @@ -1113,7 +1113,7 @@ free_skb: * @recv_if: interface that the skb is received on * * This function does one of the three following things: 1) Forward fragment, if - * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till + * the assembled packet will exceed our MTU; 2) Buffer fragment, if we still * lack further fragments; 3) Merge fragments, if we have all needed parts. * * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 7f8ade04e08e..d267b94800d6 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -605,8 +605,8 @@ bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet, * given hard_iface. If hard_iface is NULL forwarding packets on all hard * interfaces will be claimed. * - * The packets are being moved from the forw_list to the cleanup_list and - * by that allows already running threads to notice the claiming. + * The packets are being moved from the forw_list to the cleanup_list. This + * makes it possible for already running threads to notice the claim. */ static void batadv_forw_packet_list_steal(struct hlist_head *forw_list, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index f1f1c86f3419..23833a0ba5e6 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -406,7 +406,7 @@ end: * @hdr_size: size of already parsed batman-adv header * @orig_node: originator from which the batman-adv packet was sent * - * Sends a ethernet frame to the receive path of the local @soft_iface. + * Sends an ethernet frame to the receive path of the local @soft_iface. * skb->data has still point to the batman-adv header with the size @hdr_size. * The caller has to have parsed this header already and made sure that at least * @hdr_size bytes are still available for pull in @skb. diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index bd2ac570c42c..db7e3774825b 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -66,7 +66,7 @@ /** * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond - * such amound of milliseconds, the receiver is considered unreachable and the + * such amount of milliseconds, the receiver is considered unreachable and the * connection is killed */ #define BATADV_TP_MAX_RTO 30000 @@ -108,10 +108,10 @@ static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid) * batadv_tp_cwnd() - compute the new cwnd size * @base: base cwnd size value * @increment: the value to add to base to get the new size - * @min: minumim cwnd value (usually MSS) + * @min: minimum cwnd value (usually MSS) * - * Return the new cwnd size and ensures it does not exceed the Advertised - * Receiver Window size. It is wrap around safe. + * Return the new cwnd size and ensure it does not exceed the Advertised + * Receiver Window size. It is wrapped around safely. * For details refer to Section 3.1 of RFC5681 * * Return: new congestion window size in bytes @@ -254,7 +254,7 @@ static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, * @dst: the other endpoint MAC address to look for * * Look for a tp_vars object matching dst as end_point and return it after - * having incremented the refcounter. Return NULL is not found + * having increment the refcounter. Return NULL is not found * * Return: matching tp_vars or NULL when no tp_vars with @dst was found */ @@ -291,7 +291,7 @@ static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, * @session: session identifier * * Look for a tp_vars object matching dst as end_point, session as tp meter - * session and return it after having incremented the refcounter. Return NULL + * session and return it after having increment the refcounter. Return NULL * is not found * * Return: matching tp_vars or NULL when no tp_vars was found diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a9635c882fe0..98a0aaaf0d50 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -301,7 +301,7 @@ void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry) * @vid: VLAN identifier * * Return: the number of originators advertising the given address/data - * (excluding ourself). + * (excluding our self). */ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) @@ -842,7 +842,7 @@ out: * table. In case of success the value is updated with the real amount of * reserved bytes * Allocate the needed amount of memory for the entire TT TVLV and write its - * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data + * header made up of one tvlv_tt_data object and a series of tvlv_tt_vlan_data * objects, one per active VLAN served by the originator node. * * Return: the size of the allocated buffer or 0 in case of failure. @@ -1674,7 +1674,7 @@ out: * the function argument. * If a TT local entry exists for this non-mesh client remove it. * - * The caller must hold orig_node refcount. + * The caller must hold the orig_node refcount. * * Return: true if the new entry has been added, false otherwise */ @@ -1839,7 +1839,7 @@ out: * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be analyzed * - * This functon assumes the caller holds rcu_read_lock(). + * This function assumes the caller holds rcu_read_lock(). * Return: best originator list entry or NULL on errors. */ static struct batadv_tt_orig_list_entry * @@ -1887,7 +1887,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, * @tt_global_entry: global translation table entry to be printed * @seq: debugfs table seq_file struct * - * This functon assumes the caller holds rcu_read_lock(). + * This function assumes the caller holds rcu_read_lock(). */ static void batadv_tt_global_print_entry(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 0963a43ad996..6a23a566cde1 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -353,8 +353,8 @@ end: * @tvlv_value: tvlv content * @tvlv_value_len: tvlv content length * - * Return: success if handler was not found or the return value of the handler - * callback. + * Return: success if the handler was not found or the return value of the + * handler callback. */ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, struct batadv_tvlv_handler *tvlv_handler, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d152b8e81f61..cc151e1f23b2 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -455,8 +455,8 @@ struct batadv_orig_node { spinlock_t tt_buff_lock; /** - * @tt_lock: prevents from updating the table while reading it. Table - * update is made up by two operations (data structure update and + * @tt_lock: avoids concurrent read from and write to the table. Table + * update is made up of two operations (data structure update and * metadata -CRC/TTVN-recalculation) and they have to be executed * atomically in order to avoid another thread to read the * table/metadata between those. @@ -748,7 +748,7 @@ struct batadv_neigh_ifinfo { * struct batadv_bcast_duplist_entry - structure for LAN broadcast suppression */ struct batadv_bcast_duplist_entry { - /** @orig: mac address of orig node orginating the broadcast */ + /** @orig: mac address of orig node originating the broadcast */ u8 orig[ETH_ALEN]; /** @crc: crc32 checksum of broadcast payload */ @@ -1010,7 +1010,7 @@ struct batadv_priv_tt { /** * @commit_lock: prevents from executing a local TT commit while reading - * the local table. The local TT commit is made up by two operations + * the local table. The local TT commit is made up of two operations * (data structure update and metadata -CRC/TTVN- recalculation) and * they have to be executed atomically in order to avoid another thread * to read the table/metadata between those. @@ -1024,7 +1024,7 @@ struct batadv_priv_tt { #ifdef CONFIG_BATMAN_ADV_BLA /** - * struct batadv_priv_bla - per mesh interface bridge loope avoidance data + * struct batadv_priv_bla - per mesh interface bridge loop avoidance data */ struct batadv_priv_bla { /** @num_requests: number of bla requests in flight */ @@ -1718,7 +1718,7 @@ struct batadv_priv { spinlock_t softif_vlan_list_lock; #ifdef CONFIG_BATMAN_ADV_BLA - /** @bla: bridge loope avoidance data */ + /** @bla: bridge loop avoidance data */ struct batadv_priv_bla bla; #endif -- cgit From 3bda14d09dc5789a895ab02b7dcfcec19b0a65b3 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Mon, 1 Jun 2020 22:35:22 +0200 Subject: batman-adv: Introduce a configurable per interface hop penalty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some setups multiple hard interfaces with similar link qualities or throughput values are available. But people have expressed the desire to consider one of them as a backup only. Some creative solutions are currently in use: Such people are configuring multiple batman-adv mesh/soft interfaces, wire them together with some veth pairs and then tune the hop penalty to achieve an effect similar to a tunable per interface hop penalty. This patch introduces a new, configurable, per hard interface hop penalty to simplify such setups. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 17 +++++++++-------- net/batman-adv/bat_v_ogm.c | 13 ++++++++++--- net/batman-adv/hard-interface.c | 2 ++ net/batman-adv/netlink.c | 12 +++++++++++- net/batman-adv/types.h | 6 ++++++ 5 files changed, 38 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5b3a41983156..a4faf5f904d9 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1075,10 +1075,10 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_neigh_ifinfo *neigh_ifinfo; u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; + unsigned int tq_iface_hop_penalty = BATADV_TQ_MAX_VALUE; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; - unsigned int tq_iface_penalty; bool ret = false; /* find corresponding one hop neighbor */ @@ -1157,31 +1157,32 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, inv_asym_penalty = BATADV_TQ_MAX_VALUE * neigh_rq_inv_cube; inv_asym_penalty /= neigh_rq_max_cube; tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty; + tq_iface_hop_penalty -= atomic_read(&if_incoming->hop_penalty); /* penalize if the OGM is forwarded on the same interface. WiFi * interfaces and other half duplex devices suffer from throughput * drops as they can't send and receive at the same time. */ - tq_iface_penalty = BATADV_TQ_MAX_VALUE; if (if_outgoing && if_incoming == if_outgoing && batadv_is_wifi_hardif(if_outgoing)) - tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, - bat_priv); + tq_iface_hop_penalty = batadv_hop_penalty(tq_iface_hop_penalty, + bat_priv); combined_tq = batadv_ogm_packet->tq * tq_own * tq_asym_penalty * - tq_iface_penalty; + tq_iface_hop_penalty; combined_tq /= BATADV_TQ_MAX_VALUE * BATADV_TQ_MAX_VALUE * BATADV_TQ_MAX_VALUE; batadv_ogm_packet->tq = combined_tq; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", + "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_hop_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", orig_node->orig, orig_neigh_node->orig, total_count, - neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty, - batadv_ogm_packet->tq, if_incoming->net_dev->name, + neigh_rq_count, tq_own, tq_asym_penalty, + tq_iface_hop_penalty, batadv_ogm_packet->tq, + if_incoming->net_dev->name, if_outgoing ? if_outgoing->net_dev->name : "DEFAULT"); /* if link has the minimum required transmission quality diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 0d404f7bcd9f..0f8495b9eeb1 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -455,15 +455,17 @@ unlock: * @throughput: the current throughput * * Apply a penalty on the current throughput metric value based on the - * characteristic of the interface where the OGM has been received. The return - * value is computed as follows: + * characteristic of the interface where the OGM has been received. + * + * Initially the per hardif hop penalty is applied to the throughput. After + * that the return value is then computed as follows: * - throughput * 50% if the incoming and outgoing interface are the * same WiFi interface and the throughput is above * 1MBit/s * - throughput if the outgoing interface is the default * interface (i.e. this OGM is processed for the * internal table and not forwarded) - * - throughput * hop penalty otherwise + * - throughput * node hop penalty otherwise * * Return: the penalised throughput metric. */ @@ -472,9 +474,14 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, u32 throughput) { + int if_hop_penalty = atomic_read(&if_incoming->hop_penalty); int hop_penalty = atomic_read(&bat_priv->hop_penalty); int hop_penalty_max = BATADV_TQ_MAX_VALUE; + /* Apply per hardif hop penalty */ + throughput = throughput * (hop_penalty_max - if_hop_penalty) / + hop_penalty_max; + /* Don't apply hop penalty in default originator table. */ if (if_outgoing == BATADV_IF_DEFAULT) return throughput; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 53c27c67cc11..fa06b51c0144 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -939,6 +939,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + atomic_set(&hard_iface->hop_penalty, 0); + batadv_v_hardif_init(hard_iface); batadv_check_known_mac_addr(hard_iface->net_dev); diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index cfb00dfa468a..dc193618a761 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -826,6 +826,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg, goto nla_put_failure; } + if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, + atomic_read(&hard_iface->hop_penalty))) + goto nla_put_failure; + #ifdef CONFIG_BATMAN_ADV_BATMAN_V if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, atomic_read(&hard_iface->bat_v.elp_interval))) @@ -920,9 +924,15 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb, { struct batadv_hard_iface *hard_iface = info->user_ptr[1]; struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { + attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; + + atomic_set(&hard_iface->hop_penalty, nla_get_u8(attr)); + } #ifdef CONFIG_BATMAN_ADV_BATMAN_V - struct nlattr *attr; if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index cc151e1f23b2..ed519efa3c36 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -208,6 +208,12 @@ struct batadv_hard_iface { /** @rcu: struct used for freeing in an RCU-safe manner */ struct rcu_head rcu; + /** + * @hop_penalty: penalty which will be applied to the tq-field + * of an OGM received via this interface + */ + atomic_t hop_penalty; + /** @bat_iv: per hard-interface B.A.T.M.A.N. IV data */ struct batadv_hard_iface_bat_iv bat_iv; -- cgit From d39dceca388ad0e4f748836806349ebe09282283 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jun 2020 19:29:59 +0200 Subject: mptcp: add __init annotation on setup functions Add the missing annotation in some setup-only functions. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm.c | 2 +- net/mptcp/pm_netlink.c | 2 +- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 10 +++++----- net/mptcp/subflow.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 977d9c8b1453..7de09fdd42a3 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -234,7 +234,7 @@ void mptcp_pm_close(struct mptcp_sock *msk) sock_put((struct sock *)msk); } -void mptcp_pm_init(void) +void __init mptcp_pm_init(void) { pm_wq = alloc_workqueue("pm_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8); if (!pm_wq) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b78edf237ba0..c8820c4156e6 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -851,7 +851,7 @@ static struct pernet_operations mptcp_pm_pernet_ops = { .size = sizeof(struct pm_nl_pernet), }; -void mptcp_pm_nl_init(void) +void __init mptcp_pm_nl_init(void) { if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) panic("Failed to register MPTCP PM pernet subsystem.\n"); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3980fbb6f31e..9163a05b9e46 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2077,7 +2077,7 @@ static struct inet_protosw mptcp_protosw = { .flags = INET_PROTOSW_ICSK, }; -void mptcp_proto_init(void) +void __init mptcp_proto_init(void) { mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo; @@ -2139,7 +2139,7 @@ static struct inet_protosw mptcp_v6_protosw = { .flags = INET_PROTOSW_ICSK, }; -int mptcp_proto_v6_init(void) +int __init mptcp_proto_v6_init(void) { int err; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 482f53aed30a..571d39a1a17c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -337,7 +337,7 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) int mptcp_is_enabled(struct net *net); bool mptcp_subflow_data_available(struct sock *sk); -void mptcp_subflow_init(void); +void __init mptcp_subflow_init(void); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, int ifindex, @@ -355,9 +355,9 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } -void mptcp_proto_init(void); +void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) -int mptcp_proto_v6_init(void); +int __init mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone(const struct sock *sk, @@ -394,7 +394,7 @@ static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); -void mptcp_pm_init(void); +void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_close(struct mptcp_sock *msk); void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side); @@ -428,7 +428,7 @@ bool mptcp_pm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_addr_info *saddr); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); -void mptcp_pm_nl_init(void); +void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_data_init(struct mptcp_sock *msk); void mptcp_pm_nl_fully_established(struct mptcp_sock *msk); void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3838a0b3a21f..c2389ba2d4ee 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1255,7 +1255,7 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops) return 0; } -void mptcp_subflow_init(void) +void __init mptcp_subflow_init(void) { subflow_request_sock_ops = tcp_request_sock_ops; if (subflow_ops_init(&subflow_request_sock_ops) != 0) -- cgit From 2c5ebd001d4f0c64a2dfda94eb1d9b31a8863c8d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jun 2020 19:30:00 +0200 Subject: mptcp: refactor token container Replace the radix tree with a hash table allocated at boot time. The radix tree has some shortcoming: a single lock is contented by all the mptcp operation, the lookup currently use such lock, and traversing all the items would require a lock, too. With hash table instead we trade a little memory to address all the above - a per bucket lock is used. To hash the MPTCP sockets, we re-use the msk' sk_node entry: the MPTCP sockets are never hashed by the stack. Replace the existing hash proto callbacks with a dummy implementation, annotating the above constraint. Additionally refactor the token creation to code to: - limit the number of consecutive attempts to a fixed maximum. Hitting a hash bucket with a long chain is considered a failed attempt - accept() no longer can fail to token management. - if token creation fails at connect() time, we do fallback to TCP (before the connection was closed) v1 -> v2: - fix "no newline at end of file" - Jakub Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 45 +++++---- net/mptcp/protocol.h | 14 ++- net/mptcp/subflow.c | 19 ++-- net/mptcp/token.c | 271 ++++++++++++++++++++++++++++++++++++--------------- 4 files changed, 236 insertions(+), 113 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9163a05b9e46..be09fd525f8f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1448,20 +1448,6 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->token = subflow_req->token; msk->subflow = NULL; - if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { - nsk->sk_state = TCP_CLOSE; - bh_unlock_sock(nsk); - - /* we can't call into mptcp_close() here - possible BH context - * free the sock directly. - * sk_clone_lock() sets nsk refcnt to two, hence call sk_free() - * too. - */ - sk_common_release(nsk); - sk_free(nsk); - return NULL; - } - msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); if (mp_opt->mp_capable) { @@ -1547,7 +1533,7 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - mptcp_token_destroy(msk->token); + mptcp_token_destroy(msk); if (msk->cached_ext) __skb_ext_put(msk->cached_ext); @@ -1636,6 +1622,20 @@ static void mptcp_release_cb(struct sock *sk) } } +static int mptcp_hash(struct sock *sk) +{ + /* should never be called, + * we hash the TCP subflows not the master socket + */ + WARN_ON_ONCE(1); + return 0; +} + +static void mptcp_unhash(struct sock *sk) +{ + /* called from sk_common_release(), but nothing to do here */ +} + static int mptcp_get_port(struct sock *sk, unsigned short snum) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1679,7 +1679,6 @@ void mptcp_finish_connect(struct sock *ssk) */ WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->local_key, subflow->local_key); - WRITE_ONCE(msk->token, subflow->token); WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->ack_seq, ack_seq); WRITE_ONCE(msk->can_ack, 1); @@ -1761,8 +1760,8 @@ static struct proto mptcp_prot = { .sendmsg = mptcp_sendmsg, .recvmsg = mptcp_recvmsg, .release_cb = mptcp_release_cb, - .hash = inet_hash, - .unhash = inet_unhash, + .hash = mptcp_hash, + .unhash = mptcp_unhash, .get_port = mptcp_get_port, .sockets_allocated = &mptcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, @@ -1771,6 +1770,7 @@ static struct proto mptcp_prot = { .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_mem = sysctl_tcp_mem, .obj_size = sizeof(struct mptcp_sock), + .slab_flags = SLAB_TYPESAFE_BY_RCU, .no_autobind = true, }; @@ -1800,6 +1800,7 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct mptcp_sock *msk = mptcp_sk(sock->sk); + struct mptcp_subflow_context *subflow; struct socket *ssock; int err; @@ -1812,19 +1813,23 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto do_connect; } + mptcp_token_destroy(msk); ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); if (IS_ERR(ssock)) { err = PTR_ERR(ssock); goto unlock; } + subflow = mptcp_subflow_ctx(ssock->sk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of * TCP option space. */ if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) - mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0; + subflow->request_mptcp = 0; #endif + if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) + subflow->request_mptcp = 0; do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); @@ -1888,6 +1893,7 @@ static int mptcp_listen(struct socket *sock, int backlog) pr_debug("msk=%p", msk); lock_sock(sock->sk); + mptcp_token_destroy(msk); ssock = __mptcp_socket_create(msk, TCP_LISTEN); if (IS_ERR(ssock)) { err = PTR_ERR(ssock); @@ -2086,6 +2092,7 @@ void __init mptcp_proto_init(void) mptcp_subflow_init(); mptcp_pm_init(); + mptcp_token_init(); if (proto_register(&mptcp_prot, 1) != 0) panic("Failed to register MPTCP proto.\n"); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 571d39a1a17c..c05552e5fa23 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -250,6 +250,7 @@ struct mptcp_subflow_request_sock { u32 local_nonce; u32 remote_nonce; struct mptcp_sock *msk; + struct hlist_nulls_node token_node; }; static inline struct mptcp_subflow_request_sock * @@ -372,12 +373,19 @@ bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); +void __init mptcp_token_init(void); +static inline void mptcp_token_init_request(struct request_sock *req) +{ + mptcp_subflow_rsk(req)->token_node.pprev = NULL; +} + int mptcp_token_new_request(struct request_sock *req); -void mptcp_token_destroy_request(u32 token); +void mptcp_token_destroy_request(struct request_sock *req); int mptcp_token_new_connect(struct sock *sk); -int mptcp_token_new_accept(u32 token, struct sock *conn); +void mptcp_token_accept(struct mptcp_subflow_request_sock *r, + struct mptcp_sock *msk); struct mptcp_sock *mptcp_token_get_sock(u32 token); -void mptcp_token_destroy(u32 token); +void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c2389ba2d4ee..102db8c88e97 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -32,12 +32,9 @@ static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, static int subflow_rebuild_header(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - int local_id, err = 0; + int local_id; - if (subflow->request_mptcp && !subflow->token) { - pr_debug("subflow=%p", sk); - err = mptcp_token_new_connect(sk); - } else if (subflow->request_join && !subflow->local_nonce) { + if (subflow->request_join && !subflow->local_nonce) { struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn; pr_debug("subflow=%p", sk); @@ -57,9 +54,6 @@ static int subflow_rebuild_header(struct sock *sk) } out: - if (err) - return err; - return subflow->icsk_af_ops->rebuild_header(sk); } @@ -72,8 +66,7 @@ static void subflow_req_destructor(struct request_sock *req) if (subflow_req->msk) sock_put((struct sock *)subflow_req->msk); - if (subflow_req->mp_capable) - mptcp_token_destroy_request(subflow_req->token); + mptcp_token_destroy_request(req); tcp_request_sock_ops.destructor(req); } @@ -135,6 +128,7 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 0; subflow_req->mp_join = 0; subflow_req->msk = NULL; + mptcp_token_init_request(req); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -250,7 +244,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->remote_nonce = mp_opt.nonce; pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); - } else if (subflow->request_mptcp) { + } else { tp->is_mptcp = 0; } @@ -386,7 +380,7 @@ static void mptcp_sock_destruct(struct sock *sk) sock_orphan(sk); } - mptcp_token_destroy(mptcp_sk(sk)->token); + mptcp_token_destroy(mptcp_sk(sk)); inet_sock_destruct(sk); } @@ -505,6 +499,7 @@ create_child: */ new_msk->sk_destruct = mptcp_sock_destruct; mptcp_pm_new_connection(mptcp_sk(new_msk), 1); + mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); ctx->conn = new_msk; new_msk = NULL; diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 33352dd99d4d..9c0771774815 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include @@ -33,10 +33,55 @@ #include #include "protocol.h" -static RADIX_TREE(token_tree, GFP_ATOMIC); -static RADIX_TREE(token_req_tree, GFP_ATOMIC); -static DEFINE_SPINLOCK(token_tree_lock); -static int token_used __read_mostly; +#define TOKEN_MAX_RETRIES 4 +#define TOKEN_MAX_CHAIN_LEN 4 + +struct token_bucket { + spinlock_t lock; + int chain_len; + struct hlist_nulls_head req_chain; + struct hlist_nulls_head msk_chain; +}; + +static struct token_bucket *token_hash __read_mostly; +static unsigned int token_mask __read_mostly; + +static struct token_bucket *token_bucket(u32 token) +{ + return &token_hash[token & token_mask]; +} + +/* called with bucket lock held */ +static struct mptcp_subflow_request_sock * +__token_lookup_req(struct token_bucket *t, u32 token) +{ + struct mptcp_subflow_request_sock *req; + struct hlist_nulls_node *pos; + + hlist_nulls_for_each_entry_rcu(req, pos, &t->req_chain, token_node) + if (req->token == token) + return req; + return NULL; +} + +/* called with bucket lock held */ +static struct mptcp_sock * +__token_lookup_msk(struct token_bucket *t, u32 token) +{ + struct hlist_nulls_node *pos; + struct sock *sk; + + sk_nulls_for_each_rcu(sk, pos, &t->msk_chain) + if (mptcp_sk(sk)->token == token) + return mptcp_sk(sk); + return NULL; +} + +static bool __token_bucket_busy(struct token_bucket *t, u32 token) +{ + return !token || t->chain_len >= TOKEN_MAX_CHAIN_LEN || + __token_lookup_req(t, token) || __token_lookup_msk(t, token); +} /** * mptcp_token_new_request - create new key/idsn/token for subflow_request @@ -52,30 +97,32 @@ static int token_used __read_mostly; int mptcp_token_new_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - int err; - - while (1) { - u32 token; - - mptcp_crypto_key_gen_sha(&subflow_req->local_key, - &subflow_req->token, - &subflow_req->idsn); - pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n", - req, subflow_req->local_key, subflow_req->token, - subflow_req->idsn); - - token = subflow_req->token; - spin_lock_bh(&token_tree_lock); - if (!radix_tree_lookup(&token_req_tree, token) && - !radix_tree_lookup(&token_tree, token)) - break; - spin_unlock_bh(&token_tree_lock); + int retries = TOKEN_MAX_RETRIES; + struct token_bucket *bucket; + u32 token; + +again: + mptcp_crypto_key_gen_sha(&subflow_req->local_key, + &subflow_req->token, + &subflow_req->idsn); + pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n", + req, subflow_req->local_key, subflow_req->token, + subflow_req->idsn); + + token = subflow_req->token; + bucket = token_bucket(token); + spin_lock_bh(&bucket->lock); + if (__token_bucket_busy(bucket, token)) { + spin_unlock_bh(&bucket->lock); + if (!--retries) + return -EBUSY; + goto again; } - err = radix_tree_insert(&token_req_tree, - subflow_req->token, &token_used); - spin_unlock_bh(&token_tree_lock); - return err; + hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain); + bucket->chain_len++; + spin_unlock_bh(&bucket->lock); + return 0; } /** @@ -97,48 +144,56 @@ int mptcp_token_new_request(struct request_sock *req) int mptcp_token_new_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *mptcp_sock = subflow->conn; - int err; - - while (1) { - u32 token; + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + int retries = TOKEN_MAX_RETRIES; + struct token_bucket *bucket; - mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token, - &subflow->idsn); + pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n", + sk, subflow->local_key, subflow->token, subflow->idsn); - pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n", - sk, subflow->local_key, subflow->token, subflow->idsn); +again: + mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token, + &subflow->idsn); - token = subflow->token; - spin_lock_bh(&token_tree_lock); - if (!radix_tree_lookup(&token_req_tree, token) && - !radix_tree_lookup(&token_tree, token)) - break; - spin_unlock_bh(&token_tree_lock); + bucket = token_bucket(subflow->token); + spin_lock_bh(&bucket->lock); + if (__token_bucket_busy(bucket, subflow->token)) { + spin_unlock_bh(&bucket->lock); + if (!--retries) + return -EBUSY; + goto again; } - err = radix_tree_insert(&token_tree, subflow->token, mptcp_sock); - spin_unlock_bh(&token_tree_lock); - return err; + WRITE_ONCE(msk->token, subflow->token); + __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); + bucket->chain_len++; + spin_unlock_bh(&bucket->lock); + return 0; } /** - * mptcp_token_new_accept - insert token for later processing - * @token: the token to insert to the tree - * @conn: the just cloned socket linked to the new connection + * mptcp_token_accept - replace a req sk with full sock in token hash + * @req: the request socket to be removed + * @msk: the just cloned socket linked to the new connection * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. */ -int mptcp_token_new_accept(u32 token, struct sock *conn) +void mptcp_token_accept(struct mptcp_subflow_request_sock *req, + struct mptcp_sock *msk) { - int err; + struct mptcp_subflow_request_sock *pos; + struct token_bucket *bucket; - spin_lock_bh(&token_tree_lock); - err = radix_tree_insert(&token_tree, token, conn); - spin_unlock_bh(&token_tree_lock); + bucket = token_bucket(req->token); + spin_lock_bh(&bucket->lock); - return err; + /* pedantic lookup check for the moved token */ + pos = __token_lookup_req(bucket, req->token); + if (!WARN_ON_ONCE(pos != req)) + hlist_nulls_del_init_rcu(&req->token_node); + __sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain); + spin_unlock_bh(&bucket->lock); } /** @@ -152,45 +207,103 @@ int mptcp_token_new_accept(u32 token, struct sock *conn) */ struct mptcp_sock *mptcp_token_get_sock(u32 token) { - struct sock *conn; - - spin_lock_bh(&token_tree_lock); - conn = radix_tree_lookup(&token_tree, token); - if (conn) { - /* token still reserved? */ - if (conn == (struct sock *)&token_used) - conn = NULL; - else - sock_hold(conn); + struct hlist_nulls_node *pos; + struct token_bucket *bucket; + struct mptcp_sock *msk; + struct sock *sk; + + rcu_read_lock(); + bucket = token_bucket(token); + +again: + sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { + msk = mptcp_sk(sk); + if (READ_ONCE(msk->token) != token) + continue; + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto not_found; + if (READ_ONCE(msk->token) != token) { + sock_put(sk); + goto again; + } + goto found; } - spin_unlock_bh(&token_tree_lock); + if (get_nulls_value(pos) != (token & token_mask)) + goto again; + +not_found: + msk = NULL; - return mptcp_sk(conn); +found: + rcu_read_unlock(); + return msk; } /** * mptcp_token_destroy_request - remove mptcp connection/token - * @token: token of mptcp connection to remove + * @req: mptcp request socket dropping the token * - * Remove not-yet-fully-established incoming connection identified - * by @token. + * Remove the token associated to @req. */ -void mptcp_token_destroy_request(u32 token) +void mptcp_token_destroy_request(struct request_sock *req) { - spin_lock_bh(&token_tree_lock); - radix_tree_delete(&token_req_tree, token); - spin_unlock_bh(&token_tree_lock); + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); + struct mptcp_subflow_request_sock *pos; + struct token_bucket *bucket; + + if (hlist_nulls_unhashed(&subflow_req->token_node)) + return; + + bucket = token_bucket(subflow_req->token); + spin_lock_bh(&bucket->lock); + pos = __token_lookup_req(bucket, subflow_req->token); + if (!WARN_ON_ONCE(pos != subflow_req)) { + hlist_nulls_del_init_rcu(&pos->token_node); + bucket->chain_len--; + } + spin_unlock_bh(&bucket->lock); } /** * mptcp_token_destroy - remove mptcp connection/token - * @token: token of mptcp connection to remove + * @msk: mptcp connection dropping the token * - * Remove the connection identified by @token. + * Remove the token associated to @msk */ -void mptcp_token_destroy(u32 token) +void mptcp_token_destroy(struct mptcp_sock *msk) { - spin_lock_bh(&token_tree_lock); - radix_tree_delete(&token_tree, token); - spin_unlock_bh(&token_tree_lock); + struct token_bucket *bucket; + struct mptcp_sock *pos; + + if (sk_unhashed((struct sock *)msk)) + return; + + bucket = token_bucket(msk->token); + spin_lock_bh(&bucket->lock); + pos = __token_lookup_msk(bucket, msk->token); + if (!WARN_ON_ONCE(pos != msk)) { + __sk_nulls_del_node_init_rcu((struct sock *)pos); + bucket->chain_len--; + } + spin_unlock_bh(&bucket->lock); +} + +void __init mptcp_token_init(void) +{ + int i; + + token_hash = alloc_large_system_hash("MPTCP token", + sizeof(struct token_bucket), + 0, + 20,/* one slot per 1MB of memory */ + 0, + NULL, + &token_mask, + 0, + 64 * 1024); + for (i = 0; i < token_mask + 1; ++i) { + INIT_HLIST_NULLS_HEAD(&token_hash[i].req_chain, i); + INIT_HLIST_NULLS_HEAD(&token_hash[i].msk_chain, i); + spin_lock_init(&token_hash[i].lock); + } } -- cgit From a00a582203dbc43ea311a50e979038fc0c8ee19f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jun 2020 19:30:01 +0200 Subject: mptcp: move crypto test to KUNIT currently MPTCP uses a custom hook to executed unit tests at boot time. Let's use the KUNIT framework instead. Additionally move the relevant code to a separate file and export the function needed by the test when self-tests are build as a module. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/Kconfig | 20 +++++++++----- net/mptcp/Makefile | 3 +++ net/mptcp/crypto.c | 63 ++----------------------------------------- net/mptcp/crypto_test.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 67 deletions(-) create mode 100644 net/mptcp/crypto_test.c (limited to 'net') diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index a9ed3bf1d93f..d7d5f9349366 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -18,12 +18,20 @@ config MPTCP_IPV6 select IPV6 default y -config MPTCP_HMAC_TEST - bool "Tests for MPTCP HMAC implementation" +endif + +config MPTCP_KUNIT_TESTS + tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS + select MPTCP + depends on KUNIT + default KUNIT_ALL_TESTS help - This option enable boot time self-test for the HMAC implementation - used by the MPTCP code + Currently covers the MPTCP crypto helpers. + Only useful for kernel devs running KUnit test harness and are not + for inclusion into a production build. - Say N if you are unsure. + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. -endif diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index baa0640527c7..f9039804207b 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -3,3 +3,6 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o + +mptcp_crypto_test-objs := crypto_test.o +obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o \ No newline at end of file diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index 3d980713a9e2..6c4ea979dfd4 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -87,65 +87,6 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256_final(&state, (u8 *)hmac); } -#ifdef CONFIG_MPTCP_HMAC_TEST -struct test_cast { - char *key; - char *msg; - char *result; -}; - -/* we can't reuse RFC 4231 test vectors, as we have constraint on the - * input and key size. - */ -static struct test_cast tests[] = { - { - .key = "0b0b0b0b0b0b0b0b", - .msg = "48692054", - .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa", - }, - { - .key = "aaaaaaaaaaaaaaaa", - .msg = "dddddddd", - .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9", - }, - { - .key = "0102030405060708", - .msg = "cdcdcdcd", - .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d", - }, -}; - -static int __init test_mptcp_crypto(void) -{ - char hmac[32], hmac_hex[65]; - u32 nonce1, nonce2; - u64 key1, key2; - u8 msg[8]; - int i, j; - - for (i = 0; i < ARRAY_SIZE(tests); ++i) { - /* mptcp hmap will convert to be before computing the hmac */ - key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0])); - key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8])); - nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0])); - nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4])); - - put_unaligned_be32(nonce1, &msg[0]); - put_unaligned_be32(nonce2, &msg[4]); - - mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); - for (j = 0; j < 32; ++j) - sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff); - hmac_hex[64] = 0; - - if (memcmp(hmac_hex, tests[i].result, 64)) - pr_err("test %d failed, got %s expected %s", i, - hmac_hex, tests[i].result); - else - pr_info("test %d [ ok ]", i); - } - return 0; -} - -late_initcall(test_mptcp_crypto); +#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS) +EXPORT_SYMBOL_GPL(mptcp_crypto_hmac_sha); #endif diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c new file mode 100644 index 000000000000..017248dea038 --- /dev/null +++ b/net/mptcp/crypto_test.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include "protocol.h" + +struct test_case { + char *key; + char *msg; + char *result; +}; + +/* we can't reuse RFC 4231 test vectors, as we have constraint on the + * input and key size. + */ +static struct test_case tests[] = { + { + .key = "0b0b0b0b0b0b0b0b", + .msg = "48692054", + .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa", + }, + { + .key = "aaaaaaaaaaaaaaaa", + .msg = "dddddddd", + .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9", + }, + { + .key = "0102030405060708", + .msg = "cdcdcdcd", + .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d", + }, +}; + +static void mptcp_crypto_test_basic(struct kunit *test) +{ + char hmac[32], hmac_hex[65]; + u32 nonce1, nonce2; + u64 key1, key2; + u8 msg[8]; + int i, j; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + /* mptcp hmap will convert to be before computing the hmac */ + key1 = be64_to_cpu(*((__be64 *)&tests[i].key[0])); + key2 = be64_to_cpu(*((__be64 *)&tests[i].key[8])); + nonce1 = be32_to_cpu(*((__be32 *)&tests[i].msg[0])); + nonce2 = be32_to_cpu(*((__be32 *)&tests[i].msg[4])); + + put_unaligned_be32(nonce1, &msg[0]); + put_unaligned_be32(nonce2, &msg[4]); + + mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); + for (j = 0; j < 32; ++j) + sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff); + hmac_hex[64] = 0; + + KUNIT_EXPECT_STREQ(test, &hmac_hex[0], tests[i].result); + } +} + +static struct kunit_case mptcp_crypto_test_cases[] = { + KUNIT_CASE(mptcp_crypto_test_basic), + {} +}; + +static struct kunit_suite mptcp_crypto_suite = { + .name = "mptcp-crypto", + .test_cases = mptcp_crypto_test_cases, +}; + +kunit_test_suite(mptcp_crypto_suite); + +MODULE_LICENSE("GPL"); -- cgit From a8ee9c9b58199be2692f7eb761d7a01749f79655 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jun 2020 19:30:02 +0200 Subject: mptcp: introduce token KUNIT self-tests Unit tests for the internal MPTCP token APIs, using KUNIT v1 -> v2: - use the correct RCU annotation when initializing icsk ulp - fix a few checkpatch issues Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/Kconfig | 2 +- net/mptcp/Makefile | 3 +- net/mptcp/token.c | 9 ++++ net/mptcp/token_test.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 net/mptcp/token_test.c (limited to 'net') diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index d7d5f9349366..af84fce70bb0 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -26,7 +26,7 @@ config MPTCP_KUNIT_TESTS depends on KUNIT default KUNIT_ALL_TESTS help - Currently covers the MPTCP crypto helpers. + Currently covers the MPTCP crypto and token helpers. Only useful for kernel devs running KUnit test harness and are not for inclusion into a production build. diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index f9039804207b..c53f9b845523 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -5,4 +5,5 @@ mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o mptcp_crypto_test-objs := crypto_test.o -obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o \ No newline at end of file +mptcp_token_test-objs := token_test.o +obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o mptcp_token_test.o diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 9c0771774815..66a4990bd897 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -307,3 +307,12 @@ void __init mptcp_token_init(void) spin_lock_init(&token_hash[i].lock); } } + +#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS) +EXPORT_SYMBOL_GPL(mptcp_token_new_request); +EXPORT_SYMBOL_GPL(mptcp_token_new_connect); +EXPORT_SYMBOL_GPL(mptcp_token_accept); +EXPORT_SYMBOL_GPL(mptcp_token_get_sock); +EXPORT_SYMBOL_GPL(mptcp_token_destroy_request); +EXPORT_SYMBOL_GPL(mptcp_token_destroy); +#endif diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c new file mode 100644 index 000000000000..e1bd6f0a0676 --- /dev/null +++ b/net/mptcp/token_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include "protocol.h" + +static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test) +{ + struct mptcp_subflow_request_sock *req; + + req = kunit_kzalloc(test, sizeof(struct mptcp_subflow_request_sock), + GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req); + mptcp_token_init_request((struct request_sock *)req); + return req; +} + +static void mptcp_token_test_req_basic(struct kunit *test) +{ + struct mptcp_subflow_request_sock *req = build_req_sock(test); + struct mptcp_sock *null_msk = NULL; + + KUNIT_ASSERT_EQ(test, 0, + mptcp_token_new_request((struct request_sock *)req)); + KUNIT_EXPECT_NE(test, 0, (int)req->token); + KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token)); + + /* cleanup */ + mptcp_token_destroy_request((struct request_sock *)req); +} + +static struct inet_connection_sock *build_icsk(struct kunit *test) +{ + struct inet_connection_sock *icsk; + + icsk = kunit_kzalloc(test, sizeof(struct inet_connection_sock), + GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, icsk); + return icsk; +} + +static struct mptcp_subflow_context *build_ctx(struct kunit *test) +{ + struct mptcp_subflow_context *ctx; + + ctx = kunit_kzalloc(test, sizeof(struct mptcp_subflow_context), + GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, ctx); + return ctx; +} + +static struct mptcp_sock *build_msk(struct kunit *test) +{ + struct mptcp_sock *msk; + + msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk); + refcount_set(&((struct sock *)msk)->sk_refcnt, 1); + return msk; +} + +static void mptcp_token_test_msk_basic(struct kunit *test) +{ + struct inet_connection_sock *icsk = build_icsk(test); + struct mptcp_subflow_context *ctx = build_ctx(test); + struct mptcp_sock *msk = build_msk(test); + struct mptcp_sock *null_msk = NULL; + struct sock *sk; + + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); + ctx->conn = (struct sock *)msk; + sk = (struct sock *)msk; + + KUNIT_ASSERT_EQ(test, 0, + mptcp_token_new_connect((struct sock *)icsk)); + KUNIT_EXPECT_NE(test, 0, (int)ctx->token); + KUNIT_EXPECT_EQ(test, ctx->token, msk->token); + KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token)); + KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt)); + + mptcp_token_destroy(msk); + KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token)); +} + +static void mptcp_token_test_accept(struct kunit *test) +{ + struct mptcp_subflow_request_sock *req = build_req_sock(test); + struct mptcp_sock *msk = build_msk(test); + + KUNIT_ASSERT_EQ(test, 0, + mptcp_token_new_request((struct request_sock *)req)); + msk->token = req->token; + mptcp_token_accept(req, msk); + KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token)); + + /* this is now a no-op */ + mptcp_token_destroy_request((struct request_sock *)req); + KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token)); + + /* cleanup */ + mptcp_token_destroy(msk); +} + +static void mptcp_token_test_destroyed(struct kunit *test) +{ + struct mptcp_subflow_request_sock *req = build_req_sock(test); + struct mptcp_sock *msk = build_msk(test); + struct mptcp_sock *null_msk = NULL; + struct sock *sk; + + sk = (struct sock *)msk; + + KUNIT_ASSERT_EQ(test, 0, + mptcp_token_new_request((struct request_sock *)req)); + msk->token = req->token; + mptcp_token_accept(req, msk); + + /* simulate race on removal */ + refcount_set(&sk->sk_refcnt, 0); + KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token)); + + /* cleanup */ + mptcp_token_destroy(msk); +} + +static struct kunit_case mptcp_token_test_cases[] = { + KUNIT_CASE(mptcp_token_test_req_basic), + KUNIT_CASE(mptcp_token_test_msk_basic), + KUNIT_CASE(mptcp_token_test_accept), + KUNIT_CASE(mptcp_token_test_destroyed), + {} +}; + +static struct kunit_suite mptcp_token_suite = { + .name = "mptcp-token", + .test_cases = mptcp_token_test_cases, +}; + +kunit_test_suite(mptcp_token_suite); + +MODULE_LICENSE("GPL"); -- cgit From acb5a07aaf2723cd273a4089e62611a414fb1c35 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Mon, 8 Jun 2020 12:42:52 +0300 Subject: Revert "net/tls: Add force_resync for driver resync" This reverts commit b3ae2459f89773adcbf16fef4b68deaaa3be1929. Revert the force resync API. Not in use. To be replaced by a better async resync API downstream. Signed-off-by: Boris Pismenny Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- net/tls/tls_device.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 0e55f8365ce2..a562ebaaa33c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -694,11 +694,10 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx; - bool is_req_pending, is_force_resync; u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE]; + u32 sock_data, is_req_pending; struct tls_prot_info *prot; s64 resync_req; - u32 sock_data; u32 req_seq; if (tls_ctx->rx_conf != TLS_HW) @@ -713,11 +712,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) resync_req = atomic64_read(&rx_ctx->resync_req); req_seq = resync_req >> 32; seq += TLS_HEADER_SIZE - 1; - is_req_pending = resync_req & RESYNC_REQ; - is_force_resync = resync_req & RESYNC_REQ_FORCE; + is_req_pending = resync_req; - if (likely(!is_req_pending) || - (!is_force_resync && req_seq != seq) || + if (likely(!is_req_pending) || req_seq != seq || !atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) return; break; -- cgit From ed9b7646b06a2ed2450dd9437fc7d1ad2783140c Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Mon, 8 Jun 2020 19:11:38 +0300 Subject: net/tls: Add asynchronous resync This patch adds support for asynchronous resynchronization in tls_device. Async resync follows two distinct stages: 1. The NIC driver indicates that it would like to resync on some TLS record within the received packet (P), but the driver does not know (yet) which of the TLS records within the packet. At this stage, the NIC driver will query the device to find the exact TCP sequence for resync (tcpsn), however, the driver does not wait for the device to provide the response. 2. Eventually, the device responds, and the driver provides the tcpsn within the resync packet to KTLS. Now, KTLS can check the tcpsn against any processed TLS records within packet P, and also against any record that is processed in the future within packet P. The asynchronous resync path simplifies the device driver, as it can save bits on the packet completion (32-bit TCP sequence), and pass this information on an asynchronous command instead. Signed-off-by: Boris Pismenny Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- net/tls/tls_device.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'net') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a562ebaaa33c..18fa6067bb7f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -690,6 +690,47 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } +static bool +tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, + s64 resync_req, u32 *seq) +{ + u32 is_async = resync_req & RESYNC_REQ_ASYNC; + u32 req_seq = resync_req >> 32; + u32 req_end = req_seq + ((resync_req >> 16) & 0xffff); + + if (is_async) { + /* asynchronous stage: log all headers seq such that + * req_seq <= seq <= end_seq, and wait for real resync request + */ + if (between(*seq, req_seq, req_end) && + resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX) + resync_async->log[resync_async->loglen++] = *seq; + + return false; + } + + /* synchronous stage: check against the logged entries and + * proceed to check the next entries if no match was found + */ + while (resync_async->loglen) { + if (req_seq == resync_async->log[resync_async->loglen - 1] && + atomic64_try_cmpxchg(&resync_async->req, + &resync_req, 0)) { + resync_async->loglen = 0; + *seq = req_seq; + return true; + } + resync_async->loglen--; + } + + if (req_seq == *seq && + atomic64_try_cmpxchg(&resync_async->req, + &resync_req, 0)) + return true; + + return false; +} + void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -736,6 +777,16 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) seq += rcd_len; tls_bigint_increment(rcd_sn, prot->rec_seq_size); break; + case TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ_ASYNC: + resync_req = atomic64_read(&rx_ctx->resync_async->req); + is_req_pending = resync_req; + if (likely(!is_req_pending)) + return; + + if (!tls_device_rx_resync_async(rx_ctx->resync_async, + resync_req, &seq)) + return; + break; } tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn); -- cgit From fdb7eb21ddd3cc07b8120b6f5cc0b279a0ed198e Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Fri, 26 Jun 2020 21:05:32 -0700 Subject: tcp: stamp SCM_TSTAMP_ACK later in tcp_clean_rtx_queue() Currently tp->delivered is updated with sacked packets but not cumulatively acked when SCP_TSTAMP_ACK is timestamped. This patch moves a tcp_ack_tstamp() call in tcp_clean_rtx_queue() to later in the loop so that when a skb is fully acked OPT_STATS of SCM_TSTAMP_ACK will include the current skb in the delivered count. When not fully acked tcp_ack_tstamp() is a no-op and there is no change in behavior. Signed-off-by: Yousuk Seung Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f3a0eb139b76..2a683e785cca 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3078,8 +3078,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, u8 sacked = scb->sacked; u32 acked_pcount; - tcp_ack_tstamp(sk, skb, prior_snd_una); - /* Determine how many packets and what bytes were acked, tso and else */ if (after(scb->end_seq, tp->snd_una)) { if (tcp_skb_pcount(skb) == 1 || @@ -3143,6 +3141,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (!fully_acked) break; + tcp_ack_tstamp(sk, skb, prior_snd_una); + next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; -- cgit From c634e34f6ebfb75259e6ce467523fd3adf30d3d2 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Fri, 26 Jun 2020 21:05:33 -0700 Subject: tcp: add ece_ack flag to reno sack functions Pass a boolean flag that tells the ECE state of the current ack to reno sack functions. This is pure refactor for future patches to improve tracking delivered counts. Signed-off-by: Yousuk Seung Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2a683e785cca..09bed29e3ef4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1893,7 +1893,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend) /* Emulate SACKs for SACKless connection: account for a new dupack. */ -static void tcp_add_reno_sack(struct sock *sk, int num_dupack) +static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack) { if (num_dupack) { struct tcp_sock *tp = tcp_sk(sk); @@ -1911,7 +1911,7 @@ static void tcp_add_reno_sack(struct sock *sk, int num_dupack) /* Account for ACK, ACKing some data in Reno Recovery phase. */ -static void tcp_remove_reno_sacks(struct sock *sk, int acked) +static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack) { struct tcp_sock *tp = tcp_sk(sk); @@ -2697,7 +2697,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack, * delivered. Lower inflight to clock out (re)tranmissions. */ if (after(tp->snd_nxt, tp->high_seq) && num_dupack) - tcp_add_reno_sack(sk, num_dupack); + tcp_add_reno_sack(sk, num_dupack, flag & FLAG_ECE); else if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); } @@ -2779,6 +2779,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int fast_rexmit = 0, flag = *ack_flag; + bool ece_ack = flag & FLAG_ECE; bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) && tcp_force_fast_retransmit(sk)); @@ -2787,7 +2788,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ - if (flag & FLAG_ECE) + if (ece_ack) tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ @@ -2828,7 +2829,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp)) - tcp_add_reno_sack(sk, num_dupack); + tcp_add_reno_sack(sk, num_dupack, ece_ack); } else { if (tcp_try_undo_partial(sk, prior_snd_una)) return; @@ -2853,7 +2854,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); - tcp_add_reno_sack(sk, num_dupack); + tcp_add_reno_sack(sk, num_dupack, ece_ack); } if (icsk->icsk_ca_state <= TCP_CA_Disorder) @@ -2877,7 +2878,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, } /* Otherwise enter Recovery state */ - tcp_enter_recovery(sk, (flag & FLAG_ECE)); + tcp_enter_recovery(sk, ece_ack); fast_rexmit = 1; } @@ -3053,7 +3054,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, u32 prior_snd_una, - struct tcp_sacktag_state *sack) + struct tcp_sacktag_state *sack, bool ece_ack) { const struct inet_connection_sock *icsk = inet_csk(sk); u64 first_ackt, last_ackt; @@ -3191,7 +3192,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, } if (tcp_is_reno(tp)) { - tcp_remove_reno_sacks(sk, pkts_acked); + tcp_remove_reno_sacks(sk, pkts_acked, ece_ack); /* If any of the cumulatively ACKed segments was * retransmitted, non-SACK case cannot confirm that @@ -3685,7 +3686,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state); + flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state, + flag & FLAG_ECE); tcp_rack_update_reo_wnd(sk, &rs); -- cgit From f00394ce6054e54319aefa51e4c495f9ebeb8669 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Fri, 26 Jun 2020 21:05:34 -0700 Subject: tcp: count sacked packets in tcp_sacktag_state Add sack_delivered to tcp_sacktag_state and count the number of sacked and dsacked packets. This is pure refactor for future patches to improve tracking delivered counts. Signed-off-by: Yousuk Seung Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 09bed29e3ef4..db61ea597e39 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1138,6 +1138,7 @@ struct tcp_sacktag_state { struct rate_sample *rate; int flag; unsigned int mss_now; + u32 sack_delivered; }; /* Check if skb is fully within the SACK block. In presence of GSO skbs, @@ -1259,6 +1260,7 @@ static u8 tcp_sacktag_one(struct sock *sk, state->flag |= FLAG_DATA_SACKED; tp->sacked_out += pcount; tp->delivered += pcount; /* Out-of-order packets delivered */ + state->sack_delivered += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ if (tp->lost_skb_hint && @@ -1685,6 +1687,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; tp->delivered++; /* A spurious retransmission is delivered */ + state->sack_delivered++; } /* Eliminate too old ACKs, but take into @@ -3586,6 +3589,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) sack_state.first_sackt = 0; sack_state.rate = &rs; + sack_state.sack_delivered = 0; /* We very likely will need to access rtx queue. */ prefetch(sk->tcp_rtx_queue.rb_node); -- cgit From 082d4fa980b07b1cc602305e9cc0815d19663ed3 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Fri, 26 Jun 2020 21:05:35 -0700 Subject: tcp: update delivered_ce with delivered Currently tp->delivered is updated in various places in tcp_ack() but tp->delivered_ce is updated once at the end. As a result two counts in OPT_STATS of SCM_TSTAMP_ACK timestamps generated in tcp_ack() may not be in sync. This patch updates both counts at the same in tcp_ack(). Signed-off-by: Yousuk Seung Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index db61ea597e39..8479b84f0a7f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -962,6 +962,15 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) } } +/* Updates the delivered and delivered_ce counts */ +static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered, + bool ece_ack) +{ + tp->delivered += delivered; + if (ece_ack) + tp->delivered_ce += delivered; +} + /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -1259,7 +1268,7 @@ static u8 tcp_sacktag_one(struct sock *sk, sacked |= TCPCB_SACKED_ACKED; state->flag |= FLAG_DATA_SACKED; tp->sacked_out += pcount; - tp->delivered += pcount; /* Out-of-order packets delivered */ + /* Out-of-order packets delivered */ state->sack_delivered += pcount; /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ @@ -1686,7 +1695,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, num_sacks, prior_snd_una); if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; - tp->delivered++; /* A spurious retransmission is delivered */ + /* A spurious retransmission is delivered */ state->sack_delivered++; } @@ -1907,7 +1916,7 @@ static void tcp_add_reno_sack(struct sock *sk, int num_dupack, bool ece_ack) tcp_check_reno_reordering(sk, 0); delivered = tp->sacked_out - prior_sacked; if (delivered > 0) - tp->delivered += delivered; + tcp_count_delivered(tp, delivered, ece_ack); tcp_verify_left_out(tp); } } @@ -1920,7 +1929,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked, bool ece_ack) if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ - tp->delivered += max_t(int, acked - tp->sacked_out, 1); + tcp_count_delivered(tp, max_t(int, acked - tp->sacked_out, 1), + ece_ack); if (acked - 1 >= tp->sacked_out) tp->sacked_out = 0; else @@ -3116,7 +3126,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (sacked & TCPCB_SACKED_ACKED) { tp->sacked_out -= acked_pcount; } else if (tcp_is_sack(tp)) { - tp->delivered += acked_pcount; + tcp_count_delivered(tp, acked_pcount, ece_ack); if (!tcp_skb_spurious_retrans(tp, skb)) tcp_rack_advance(tp, sacked, scb->end_seq, tcp_skb_timestamp_us(skb)); @@ -3562,10 +3572,9 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) delivered = tp->delivered - prior_delivered; NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered); - if (flag & FLAG_ECE) { - tp->delivered_ce += delivered; + if (flag & FLAG_ECE) NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered); - } + return delivered; } @@ -3665,6 +3674,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) ack_ev_flags |= CA_ACK_ECE; } + if (sack_state.sack_delivered) + tcp_count_delivered(tp, sack_state.sack_delivered, + flag & FLAG_ECE); + if (flag & FLAG_WIN_UPDATE) ack_ev_flags |= CA_ACK_WIN_UPDATE; -- cgit From 42deace2a54cac2ecb0fb3c38d98502942bd1167 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sun, 28 Jun 2020 21:53:35 +0200 Subject: net/hsr: fix hsr_dev_xmit()'s return type The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, but the implementation in this driver returns an 'int'. Fix this by returning 'netdev_tx_t' in this driver too. Signed-off-by: Luc Van Oostenryck Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 478852ef98ef..1032b83d7047 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -210,7 +210,7 @@ static netdev_features_t hsr_fix_features(struct net_device *dev, return hsr_features_recompute(hsr, features); } -static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); struct hsr_port *master; -- cgit From 433f17a93c52478e10d6541fb706fbcd0c6a6124 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sun, 28 Jun 2020 21:53:36 +0200 Subject: l2tp: fix l2tp_eth_dev_xmit()'s return type The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, but the implementation in this driver returns an 'int'. Fix this by returning 'netdev_tx_t' in this driver too. Signed-off-by: Luc Van Oostenryck Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index fd5ac2788e45..3099efa19249 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -73,7 +73,7 @@ static void l2tp_eth_dev_uninit(struct net_device *dev) */ } -static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct l2tp_eth *priv = netdev_priv(dev); struct l2tp_session *session = priv->session; -- cgit From aebe4426ccaa4838f36ea805cdf7d76503e65117 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:25 +0300 Subject: net: sched: Pass root lock to Qdisc_ops.enqueue A following patch introduces qevents, points in qdisc algorithm where packet can be processed by user-defined filters. Should this processing lead to a situation where a new packet is to be enqueued on the same port, holding the root lock would lead to deadlocks. To solve the issue, qevent handler needs to unlock and relock the root lock when necessary. To that end, add the root lock argument to the qdisc op enqueue, and propagate throughout. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- net/sched/sch_atm.c | 4 ++-- net/sched/sch_blackhole.c | 2 +- net/sched/sch_cake.c | 2 +- net/sched/sch_cbq.c | 4 ++-- net/sched/sch_cbs.c | 18 +++++++++--------- net/sched/sch_choke.c | 2 +- net/sched/sch_codel.c | 2 +- net/sched/sch_drr.c | 4 ++-- net/sched/sch_dsmark.c | 4 ++-- net/sched/sch_etf.c | 2 +- net/sched/sch_ets.c | 4 ++-- net/sched/sch_fifo.c | 6 +++--- net/sched/sch_fq.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_generic.c | 4 ++-- net/sched/sch_gred.c | 2 +- net/sched/sch_hfsc.c | 6 +++--- net/sched/sch_hhf.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_multiq.c | 4 ++-- net/sched/sch_netem.c | 8 ++++---- net/sched/sch_pie.c | 2 +- net/sched/sch_plug.c | 2 +- net/sched/sch_prio.c | 6 +++--- net/sched/sch_qfq.c | 4 ++-- net/sched/sch_red.c | 4 ++-- net/sched/sch_sfb.c | 4 ++-- net/sched/sch_sfq.c | 2 +- net/sched/sch_skbprio.c | 2 +- net/sched/sch_taprio.c | 4 ++-- net/sched/sch_tbf.c | 10 +++++----- net/sched/sch_teql.c | 4 ++-- 34 files changed, 69 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3a46b86cbd67..c02bae927812 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3749,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK; qdisc_run(q); if (unlikely(to_free)) @@ -3792,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ee12ca9f55b4..fb6b16c4e46d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, /* --------------------------- Qdisc operations ---------------------------- */ -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct atm_qdisc_data *p = qdisc_priv(sch); @@ -432,7 +432,7 @@ done: #endif } - ret = qdisc_enqueue(skb, flow->q, to_free); + ret = qdisc_enqueue(skb, flow->q, root_lock, to_free); if (ret != NET_XMIT_SUCCESS) { drop: __maybe_unused if (net_xmit_drop_count(ret)) { diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index a7f7667ae984..187644657c4f 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -13,7 +13,7 @@ #include #include -static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 65a95cb094e8..e9c502dd29a2 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1687,7 +1687,7 @@ hash: static void cake_reconfigure(struct Qdisc *sch); -static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cake_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 39b427dc7512..052d4a1af69a 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -356,7 +356,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) } static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -373,7 +373,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return ret; } - ret = qdisc_enqueue(skb, cl->q, to_free); + ret = qdisc_enqueue(skb, cl->q, root_lock, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; cbq_mark_toplevel(q, cl); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 2eaac2ff380f..7af15ebe07f7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -77,7 +77,7 @@ struct cbs_sched_data { s64 sendslope; /* in bytes/s */ s64 idleslope; /* in bytes/s */ struct qdisc_watchdog watchdog; - int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; @@ -85,13 +85,13 @@ struct cbs_sched_data { }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct Qdisc *child, + struct Qdisc *child, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); int err; - err = child->ops->enqueue(skb, child, to_free); + err = child->ops->enqueue(skb, child, root_lock, to_free); if (err != NET_XMIT_SUCCESS) return err; @@ -101,16 +101,16 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } -static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; - return cbs_child_enqueue(skb, sch, qdisc, to_free); + return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free); } -static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); @@ -124,15 +124,15 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, q->last = ktime_get_ns(); } - return cbs_child_enqueue(skb, sch, qdisc, to_free); + return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free); } -static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); - return q->enqueue(skb, sch, to_free); + return q->enqueue(skb, sch, root_lock, to_free); } /* timediff is in ns, slope is in bytes/s */ diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index bd618b00d319..baf3faee31aa 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -210,7 +210,7 @@ static bool choke_match_random(const struct choke_sched_data *q, return choke_match_flow(oskb, nskb); } -static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct choke_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 30169b3adbbb..1d94837abdd8 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -108,7 +108,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) return skb; } -static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct codel_sched_data *q; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 07a2b0b35495..0d5c9a8ec61d 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -337,7 +337,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, return NULL; } -static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -355,7 +355,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 05605b30bef3..fbe49fffcdbb 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -198,7 +198,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, /* --------------------------- Qdisc operations ---------------------------- */ -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -267,7 +267,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, } } - err = qdisc_enqueue(skb, p->q, to_free); + err = qdisc_enqueue(skb, p->q, root_lock, to_free); if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index c48f91075b5c..7a7c50a68115 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -160,7 +160,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) } static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, - struct sk_buff **to_free) + spinlock_t *root_lock, struct sk_buff **to_free) { struct etf_sched_data *q = qdisc_priv(sch); struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index a87e9159338c..373dc5855d4e 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -415,7 +415,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, return &q->classes[band]; } -static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -433,7 +433,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index a579a4131d22..b4da5b624ad8 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -16,7 +16,7 @@ /* 1 band FIFO pseudo-"scheduler" */ -static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) @@ -25,7 +25,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } -static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { if (likely(sch->q.qlen < sch->limit)) @@ -34,7 +34,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } -static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int prev_backlog; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 2fb76fc0cc31..a90d745c41e0 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -439,7 +439,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); } -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 459a784056c0..6bf979f95509 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -181,7 +181,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, return idx; } -static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct fq_codel_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index fb760cee824e..a27a250ab8f9 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -125,7 +125,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, skb->next = NULL; } -static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct fq_pie_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 265a61d011df..715cde1df9e4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -520,7 +520,7 @@ EXPORT_SYMBOL(netif_carrier_off); cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock, struct sk_buff **to_free) { __qdisc_drop(skb, to_free); @@ -614,7 +614,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, return &priv->q[band]; } -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock, struct sk_buff **to_free) { int band = prio2band[skb->priority & TC_PRIO_MAX]; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8599c6f31b05..7d67c6cd6605 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -161,7 +161,7 @@ static bool gred_per_vq_red_flags_used(struct gred_sched *table) return false; } -static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct gred_sched_data *q = NULL; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 433f2190960f..7f6670044f0a 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1528,8 +1528,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) return -1; } -static int -hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; @@ -1545,7 +1545,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 420ede875322..ddc6bf1d85d0 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -368,7 +368,7 @@ static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free) return bucket - q->buckets; } -static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 8184c87da8be..52fc513688b1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -576,7 +576,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) cl->prio_activity = 0; } -static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { int uninitialized_var(ret); @@ -599,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, __qdisc_drop(skb, to_free); return ret; #endif - } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, + } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, root_lock, to_free)) != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) { qdisc_qstats_drop(sch); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 1330ad224931..648611f5c105 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -57,7 +57,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) } static int -multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct Qdisc *qdisc; @@ -74,7 +74,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } #endif - ret = qdisc_enqueue(skb, qdisc, to_free); + ret = qdisc_enqueue(skb, qdisc, root_lock, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 84f82771cdf5..8fb17483a34f 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -431,7 +431,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ -static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct netem_sched_data *q = qdisc_priv(sch); @@ -480,7 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq, to_free); + rootq->enqueue(skb2, rootq, root_lock, to_free); q->duplicate = dupsave; rc_drop = NET_XMIT_SUCCESS; } @@ -604,7 +604,7 @@ finish_segs: skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; last_len = segs->len; - rc = qdisc_enqueue(segs, sch, to_free); + rc = qdisc_enqueue(segs, sch, root_lock, to_free); if (rc != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(rc)) qdisc_qstats_drop(sch); @@ -720,7 +720,7 @@ deliver: struct sk_buff *to_free = NULL; int err; - err = qdisc_enqueue(skb, q->qdisc, &to_free); + err = qdisc_enqueue(skb, q->qdisc, NULL, &to_free); kfree_skb_list(to_free); if (err != NET_XMIT_SUCCESS && net_xmit_drop_count(err)) { diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index c65077f0c0f3..b305313b64e3 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -82,7 +82,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, } EXPORT_SYMBOL_GPL(pie_drop_early); -static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct pie_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index cbc2ebca4548..e5f8b4769b4d 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -84,7 +84,7 @@ struct plug_sched_data { u32 pkts_to_release; }; -static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct plug_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 647941702f9f..a3e187f2603c 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -65,8 +65,8 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return q->queues[band]; } -static int -prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct Qdisc *qdisc; @@ -83,7 +83,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } #endif - ret = qdisc_enqueue(skb, qdisc, to_free); + ret = qdisc_enqueue(skb, qdisc, root_lock, to_free); if (ret == NET_XMIT_SUCCESS) { sch->qstats.backlog += len; sch->q.qlen++; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 0b05ac7c848e..ede854516825 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1194,7 +1194,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb), gso_segs; @@ -1225,7 +1225,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, to_free); + err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); if (net_xmit_drop_count(err)) { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 555a1b9e467f..6ace7d757e8b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -65,7 +65,7 @@ static int red_use_nodrop(struct red_sched_data *q) return q->flags & TC_RED_NODROP; } -static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct red_sched_data *q = qdisc_priv(sch); @@ -118,7 +118,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; } - ret = qdisc_enqueue(skb, child, to_free); + ret = qdisc_enqueue(skb, child, root_lock, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 4074c50ac3d7..d2a6e78262bb 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -276,7 +276,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, return false; } -static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { @@ -399,7 +399,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } enqueue: - ret = qdisc_enqueue(skb, child, to_free); + ret = qdisc_enqueue(skb, child, root_lock, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 5a6def5e4e6d..46cdefd69e44 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q) } static int -sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash, dropped; diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 7a5e4c454715..f75f237c4436 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -65,7 +65,7 @@ static u16 calc_new_low_prio(const struct skbprio_sched_data *q) return SKBPRIO_MAX_PRIORITY - 1; } -static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index e981992634dd..daef2ff60a98 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -410,7 +410,7 @@ done: return txtime; } -static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); @@ -435,7 +435,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; - return qdisc_enqueue(skb, child, to_free); + return qdisc_enqueue(skb, child, root_lock, to_free); } static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 78e79029dc63..c3eb5cdb83a8 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -187,7 +187,7 @@ static int tbf_offload_dump(struct Qdisc *sch) /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ -static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -206,7 +206,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; len += segs->len; - ret = qdisc_enqueue(segs, q->qdisc, to_free); + ret = qdisc_enqueue(segs, q->qdisc, root_lock, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); @@ -221,7 +221,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } -static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, +static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -231,10 +231,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) - return tbf_segment(skb, sch, to_free); + return tbf_segment(skb, sch, root_lock, to_free); return qdisc_drop(skb, sch, to_free); } - ret = qdisc_enqueue(skb, q->qdisc, to_free); + ret = qdisc_enqueue(skb, q->qdisc, root_lock, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 689ef6f3ded8..511964653476 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -72,8 +72,8 @@ struct teql_sched_data { /* "teql*" qdisc routines */ -static int -teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) +static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + struct sk_buff **to_free) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); -- cgit From 3625750f05ecce21a0fce429c1ff85acfffb461b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:26 +0300 Subject: net: sched: Introduce helpers for qevent blocks Qevents are attach points for TC blocks, where filters can be put that are executed when "interesting events" take place in a qdisc. The data to keep and the functions to invoke to maintain a qevent will be largely the same between qevents. Therefore introduce sched-wide helpers for qevent management. Currently, similarly to ingress and egress blocks of clsact pseudo-qdisc, blocks attachment cannot be changed after the qdisc is created. To that end, add a helper tcf_qevent_validate_change(), which verifies whether block index attribute is not attached, or if it is, whether its value matches the current one (i.e. there is no material change). The function tcf_qevent_handle() should be invoked when qdisc hits the "interesting event" corresponding to a block. This function releases root lock for the duration of executing the attached filters, to allow packets generated through user actions (notably mirred) to be reinserted to the same qdisc tree. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/sched/cls_api.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5bfa6b985bb8..1b14d5f57e7f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3748,6 +3748,125 @@ unsigned int tcf_exts_num_actions(struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_num_actions); +#ifdef CONFIG_NET_CLS_ACT +static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr, + u32 *p_block_index, + struct netlink_ext_ack *extack) +{ + *p_block_index = nla_get_u32(block_index_attr); + if (!*p_block_index) { + NL_SET_ERR_MSG(extack, "Block number may not be zero"); + return -EINVAL; + } + + return 0; +} + +int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, + enum flow_block_binder_type binder_type, + struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + u32 block_index; + int err; + + if (!block_index_attr) + return 0; + + err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); + if (err) + return err; + + if (!block_index) + return 0; + + qe->info.binder_type = binder_type; + qe->info.chain_head_change = tcf_chain_head_change_dflt; + qe->info.chain_head_change_priv = &qe->filter_chain; + qe->info.block_index = block_index; + + return tcf_block_get_ext(&qe->block, sch, &qe->info, extack); +} +EXPORT_SYMBOL(tcf_qevent_init); + +void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch) +{ + if (qe->info.block_index) + tcf_block_put_ext(qe->block, sch, &qe->info); +} +EXPORT_SYMBOL(tcf_qevent_destroy); + +int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr, + struct netlink_ext_ack *extack) +{ + u32 block_index; + int err; + + if (!block_index_attr) + return 0; + + err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack); + if (err) + return err; + + /* Bounce newly-configured block or change in block. */ + if (block_index != qe->info.block_index) { + NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(tcf_qevent_validate_change); + +struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, + spinlock_t *root_lock, struct sk_buff **to_free, int *ret) +{ + struct tcf_result cl_res; + struct tcf_proto *fl; + + if (!qe->info.block_index) + return skb; + + fl = rcu_dereference_bh(qe->filter_chain); + + if (root_lock) + spin_unlock(root_lock); + + switch (tcf_classify(skb, fl, &cl_res, false)) { + case TC_ACT_SHOT: + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); + *ret = __NET_XMIT_BYPASS; + return NULL; + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_TRAP: + __qdisc_drop(skb, to_free); + *ret = __NET_XMIT_STOLEN; + return NULL; + case TC_ACT_REDIRECT: + skb_do_redirect(skb); + *ret = __NET_XMIT_STOLEN; + return NULL; + } + + if (root_lock) + spin_lock(root_lock); + + return skb; +} +EXPORT_SYMBOL(tcf_qevent_handle); + +int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe) +{ + if (!qe->info.block_index) + return 0; + return nla_put_u32(skb, attr_name, qe->info.block_index); +} +EXPORT_SYMBOL(tcf_qevent_dump); +#endif + static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); -- cgit From 65545ea24998bb9aab1ce713a67c693dc7a947ec Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:27 +0300 Subject: net: sched: sch_red: Split init and change callbacks In the following patches, RED will get two qevents. The implementation will be clearer if the callback for change is not a pure subset of the callback for init. Split the two and promote attribute parsing to the callbacks themselves from the common code, because it will be handy there. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/sched/sch_red.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6ace7d757e8b..225ce370e5a8 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -215,12 +215,11 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), }; -static int red_change(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) +static int __red_change(struct Qdisc *sch, struct nlattr **tb, + struct netlink_ext_ack *extack) { struct Qdisc *old_child = NULL, *child = NULL; struct red_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_RED_MAX + 1]; struct nla_bitfield32 flags_bf; struct tc_red_qopt *ctl; unsigned char userbits; @@ -228,14 +227,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, int err; u32 max_P; - if (opt == NULL) - return -EINVAL; - - err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, - NULL); - if (err < 0) - return err; - if (tb[TCA_RED_PARMS] == NULL || tb[TCA_RED_STAB] == NULL) return -EINVAL; @@ -323,11 +314,38 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct red_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_RED_MAX + 1]; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + extack); + if (err < 0) + return err; q->qdisc = &noop_qdisc; q->sch = sch; timer_setup(&q->adapt_timer, red_adaptative_timer, 0); - return red_change(sch, opt, extack); + return __red_change(sch, tb, extack); +} + +static int red_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_RED_MAX + 1]; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, + extack); + if (err < 0) + return err; + + return __red_change(sch, tb, extack); } static int red_dump_offload_stats(struct Qdisc *sch) -- cgit From aee9caa03fc3c8b02f8f31824354d85f30e562e0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 27 Jun 2020 01:45:28 +0300 Subject: net: sched: sch_red: Add qevents "early_drop" and "mark" In order to allow acting on dropped and/or ECN-marked packets, add two new qevents to the RED qdisc: "early_drop" and "mark". Filters attached at "early_drop" block are executed as packets are early-dropped, those attached at the "mark" block are executed as packets are ECN-marked. Two new attributes are introduced: TCA_RED_EARLY_DROP_BLOCK with the block index for the "early_drop" qevent, and TCA_RED_MARK_BLOCK for the "mark" qevent. Absence of these attributes signifies "don't care": no block is allocated in that case, or the existing blocks are left intact in case of the change callback. For purposes of offloading, blocks attached to these qevents appear with newly-introduced binder types, FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP and FLOW_BLOCK_BINDER_TYPE_RED_MARK. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/sched/sch_red.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 225ce370e5a8..de2be4d04ed6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -46,6 +46,8 @@ struct red_sched_data { struct red_vars vars; struct red_stats stats; struct Qdisc *qdisc; + struct tcf_qevent qe_early_drop; + struct tcf_qevent qe_mark; }; #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) @@ -92,6 +94,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ if (INET_ECN_set_ce(skb)) { q->stats.prob_mark++; + skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret); + if (!skb) + return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { q->stats.prob_drop++; goto congestion_drop; @@ -109,6 +114,9 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ if (INET_ECN_set_ce(skb)) { q->stats.forced_mark++; + skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret); + if (!skb) + return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { q->stats.forced_drop++; goto congestion_drop; @@ -129,6 +137,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ return ret; congestion_drop: + skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, root_lock, to_free, &ret); + if (!skb) + return NET_XMIT_CN | ret; + qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; } @@ -202,6 +214,8 @@ static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); + tcf_qevent_destroy(&q->qe_mark, sch); + tcf_qevent_destroy(&q->qe_early_drop, sch); del_timer_sync(&q->adapt_timer); red_offload(sch, false); qdisc_put(q->qdisc); @@ -213,6 +227,8 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, [TCA_RED_MAX_P] = { .type = NLA_U32 }, [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), + [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 }, + [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 }, }; static int __red_change(struct Qdisc *sch, struct nlattr **tb, @@ -328,12 +344,38 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt, q->qdisc = &noop_qdisc; q->sch = sch; timer_setup(&q->adapt_timer, red_adaptative_timer, 0); - return __red_change(sch, tb, extack); + + err = __red_change(sch, tb, extack); + if (err) + return err; + + err = tcf_qevent_init(&q->qe_early_drop, sch, + FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP, + tb[TCA_RED_EARLY_DROP_BLOCK], extack); + if (err) + goto err_early_drop_init; + + err = tcf_qevent_init(&q->qe_mark, sch, + FLOW_BLOCK_BINDER_TYPE_RED_MARK, + tb[TCA_RED_MARK_BLOCK], extack); + if (err) + goto err_mark_init; + + return 0; + +err_mark_init: + tcf_qevent_destroy(&q->qe_early_drop, sch); +err_early_drop_init: + del_timer_sync(&q->adapt_timer); + red_offload(sch, false); + qdisc_put(q->qdisc); + return err; } static int red_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct red_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_RED_MAX + 1]; int err; @@ -345,6 +387,16 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; + err = tcf_qevent_validate_change(&q->qe_early_drop, + tb[TCA_RED_EARLY_DROP_BLOCK], extack); + if (err) + return err; + + err = tcf_qevent_validate_change(&q->qe_mark, + tb[TCA_RED_MARK_BLOCK], extack); + if (err) + return err; + return __red_change(sch, tb, extack); } @@ -389,7 +441,9 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || nla_put_bitfield32(skb, TCA_RED_FLAGS, - q->flags, TC_RED_SUPPORTED_FLAGS)) + q->flags, TC_RED_SUPPORTED_FLAGS) || + tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) || + tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit From e1ff9e82e2ea53d01540692a85c16a77e1089537 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 29 Jun 2020 22:26:20 +0200 Subject: net: mptcp: improve fallback to TCP Keep using MPTCP sockets and a use "dummy mapping" in case of fallback to regular TCP. When fallback is triggered, skip addition of the MPTCP option on send. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/11 Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/22 Co-developed-by: Paolo Abeni Signed-off-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 9 ++++- net/mptcp/protocol.c | 98 ++++++++++++++-------------------------------------- net/mptcp/protocol.h | 33 ++++++++++++++++++ net/mptcp/subflow.c | 47 ++++++++++++++++--------- 4 files changed, 98 insertions(+), 89 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index df9a51425c6f..b96d3660562f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -624,6 +624,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, opts->suboptions = 0; + if (unlikely(mptcp_check_fallback(sk))) + return false; + if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) ret = true; else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, @@ -714,7 +717,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, */ if (!mp_opt->mp_capable) { subflow->mp_capable = 0; - tcp_sk(sk)->is_mptcp = 0; + pr_fallback(msk); + __mptcp_do_fallback(msk); return false; } @@ -814,6 +818,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; + if (__mptcp_check_fallback(msk)) + return; + mptcp_get_options(skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index be09fd525f8f..84ae96be9837 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -52,11 +52,6 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) return msk->subflow; } -static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk) -{ - return msk->first && !sk_is_mptcp(msk->first); -} - static struct socket *mptcp_is_tcpsk(struct sock *sk) { struct socket *sock = sk->sk_socket; @@ -94,7 +89,7 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) if (unlikely(sock)) return sock; - if (likely(!__mptcp_needs_tcp_fallback(msk))) + if (likely(!__mptcp_check_fallback(msk))) return NULL; return msk->subflow; @@ -133,6 +128,11 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) list_add(&subflow->node, &msk->conn_list); subflow->request_mptcp = 1; + /* accept() will wait on first subflow sk_wq, and we always wakes up + * via msk->sk_socket + */ + RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq); + set_state: if (state != MPTCP_SAME_STATE) inet_sk_state_store(sk, state); @@ -229,6 +229,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, if (!skb) break; + if (__mptcp_check_fallback(msk)) { + /* if we are running under the workqueue, TCP could have + * collapsed skbs between dummy map creation and now + * be sure to adjust the size + */ + map_remaining = skb->len; + subflow->map_data_len = skb->len; + } + offset = seq - TCP_SKB_CB(skb)->seq; fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (fin) { @@ -466,8 +475,15 @@ static void mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - u64 snd_una = atomic64_read(&msk->snd_una); bool cleaned = false; + u64 snd_una; + + /* on fallback we just need to ignore snd_una, as this is really + * plain TCP + */ + if (__mptcp_check_fallback(msk)) + atomic64_set(&msk->snd_una, msk->write_seq); + snd_una = atomic64_read(&msk->snd_una); list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) @@ -740,7 +756,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int mss_now = 0, size_goal = 0, ret = 0; struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; - struct socket *ssock; size_t copied = 0; struct sock *ssk; bool tx_ok; @@ -759,15 +774,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } -fallback: - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) { - release_sock(sk); - pr_debug("fallback passthrough"); - ret = sock_sendmsg(ssock, msg); - return ret >= 0 ? ret + copied : (copied ? copied : ret); - } - pfrag = sk_page_frag(sk); restart: mptcp_clean_una(sk); @@ -819,17 +825,6 @@ wait_for_sndbuf: } break; } - if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { - /* Can happen for passive sockets: - * 3WHS negotiated MPTCP, but first packet after is - * plain TCP (e.g. due to middlebox filtering unknown - * options). - * - * Fall back to TCP. - */ - release_sock(ssk); - goto fallback; - } copied += ret; @@ -972,7 +967,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; int copied = 0; int target; long timeo; @@ -981,16 +975,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EOPNOTSUPP; lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) { -fallback: - release_sock(sk); - pr_debug("fallback-read subflow=%p", - mptcp_subflow_ctx(ssock->sk)); - copied = sock_recvmsg(ssock, msg, flags); - return copied; - } - timeo = sock_rcvtimeo(sk, nonblock); len = min_t(size_t, len, INT_MAX); @@ -1056,9 +1040,6 @@ fallback: pr_debug("block timeout %ld", timeo); mptcp_wait_data(sk, &timeo); - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) - goto fallback; } if (skb_queue_empty(&sk->sk_receive_queue)) { @@ -1335,8 +1316,6 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how, break; } - /* Wake up anyone sleeping in poll. */ - ssk->sk_state_change(ssk); release_sock(ssk); } @@ -1660,12 +1639,6 @@ void mptcp_finish_connect(struct sock *ssk) sk = subflow->conn; msk = mptcp_sk(sk); - if (!subflow->mp_capable) { - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - return; - } - pr_debug("msk=%p, token=%u", sk, subflow->token); mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); @@ -1971,23 +1944,10 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct mptcp_sock *msk; - struct socket *ssock; __poll_t mask = 0; msk = mptcp_sk(sk); - lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); - if (!ssock) - ssock = __mptcp_nmpc_socket(msk); - if (ssock) { - mask = ssock->ops->poll(file, ssock, wait); - release_sock(sk); - return mask; - } - - release_sock(sk); sock_poll_wait(file, sock, wait); - lock_sock(sk); if (test_bit(MPTCP_DATA_READY, &msk->flags)) mask = EPOLLIN | EPOLLRDNORM; @@ -1997,8 +1957,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - release_sock(sk); - return mask; } @@ -2006,18 +1964,11 @@ static int mptcp_shutdown(struct socket *sock, int how) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; - struct socket *ssock; int ret = 0; pr_debug("sk=%p, how=%d", msk, how); lock_sock(sock->sk); - ssock = __mptcp_tcp_fallback(msk); - if (ssock) { - release_sock(sock->sk); - return inet_shutdown(ssock, how); - } - if (how == SHUT_WR || how == SHUT_RDWR) inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); @@ -2043,6 +1994,9 @@ static int mptcp_shutdown(struct socket *sock, int how) mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq); } + /* Wake up anyone sleeping in poll. */ + sock->sk->sk_state_change(sock->sk); + out_unlock: release_sock(sock->sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c05552e5fa23..a709df659ae0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -89,6 +89,7 @@ #define MPTCP_SEND_SPACE 1 #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 +#define MPTCP_FALLBACK_DONE 4 struct mptcp_options_received { u64 sndr_key; @@ -457,4 +458,36 @@ static inline bool before64(__u64 seq1, __u64 seq2) void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); +static inline bool __mptcp_check_fallback(struct mptcp_sock *msk) +{ + return test_bit(MPTCP_FALLBACK_DONE, &msk->flags); +} + +static inline bool mptcp_check_fallback(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + return __mptcp_check_fallback(msk); +} + +static inline void __mptcp_do_fallback(struct mptcp_sock *msk) +{ + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) { + pr_debug("TCP fallback already done (msk=%p)", msk); + return; + } + set_bit(MPTCP_FALLBACK_DONE, &msk->flags); +} + +static inline void mptcp_do_fallback(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + __mptcp_do_fallback(msk); +} + +#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) + #endif /* __MPTCP_PROTOCOL_H */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 102db8c88e97..cb8a42ff4646 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -216,7 +216,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; - struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -230,6 +229,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) return; subflow->conn_finished = 1; + subflow->ssn_offset = TCP_SKB_CB(skb)->seq; + pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp && mp_opt.mp_capable) { @@ -245,21 +246,20 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); } else { - tp->is_mptcp = 0; + if (subflow->request_mptcp) + MPTCP_INC_STATS(sock_net(sk), + MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); + mptcp_do_fallback(sk); + pr_fallback(mptcp_sk(subflow->conn)); } - if (!tp->is_mptcp) + if (mptcp_check_fallback(sk)) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - - if (skb) { - pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); - subflow->ssn_offset = TCP_SKB_CB(skb)->seq; - } } else if (subflow->mp_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -279,9 +279,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); - if (skb) - subflow->ssn_offset = TCP_SKB_CB(skb)->seq; - if (!mptcp_finish_join(sk)) goto do_reset; @@ -557,7 +554,8 @@ enum mapping_status { MAPPING_OK, MAPPING_INVALID, MAPPING_EMPTY, - MAPPING_DATA_FIN + MAPPING_DATA_FIN, + MAPPING_DUMMY }; static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) @@ -621,6 +619,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk) if (!skb) return MAPPING_EMPTY; + if (mptcp_check_fallback(ssk)) + return MAPPING_DUMMY; + mpext = mptcp_get_ext(skb); if (!mpext || !mpext->use_map) { if (!subflow->map_valid && !skb->len) { @@ -762,6 +763,16 @@ static bool subflow_check_data_avail(struct sock *ssk) ssk->sk_err = EBADMSG; goto fatal; } + if (status == MAPPING_DUMMY) { + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - + subflow->ssn_offset; + return true; + } if (status != MAPPING_OK) return false; @@ -885,14 +896,18 @@ static void subflow_data_ready(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct mptcp_sock *msk; - if (!subflow->mp_capable && !subflow->mp_join) { - subflow->tcp_data_ready(sk); - + msk = mptcp_sk(parent); + if (inet_sk_state_load(sk) == TCP_LISTEN) { + set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; } + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && + !subflow->mp_join); + if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); } @@ -1117,7 +1132,7 @@ static void subflow_state_change(struct sock *sk) * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ - if (subflow->mp_capable && mptcp_subflow_data_available(sk)) + if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); if (!(parent->sk_shutdown & RCV_SHUTDOWN) && -- cgit From 8fd738049ac3d67a937d36577763b47180aae1ad Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 29 Jun 2020 22:26:21 +0200 Subject: mptcp: fallback in case of simultaneous connect when a MPTCP client tries to connect to itself, tcp_finish_connect() is never reached. Because of this, depending on the socket current state, multiple faulty behaviours can be observed: 1) a WARN_ON() in subflow_data_ready() is hit WARNING: CPU: 2 PID: 882 at net/mptcp/subflow.c:911 subflow_data_ready+0x18b/0x230 [...] CPU: 2 PID: 882 Comm: gh35 Not tainted 5.7.0+ #187 [...] RIP: 0010:subflow_data_ready+0x18b/0x230 [...] Call Trace: tcp_data_queue+0xd2f/0x4250 tcp_rcv_state_process+0xb1c/0x49d3 tcp_v4_do_rcv+0x2bc/0x790 __release_sock+0x153/0x2d0 release_sock+0x4f/0x170 mptcp_shutdown+0x167/0x4e0 __sys_shutdown+0xe6/0x180 __x64_sys_shutdown+0x50/0x70 do_syscall_64+0x9a/0x370 entry_SYSCALL_64_after_hwframe+0x44/0xa9 2) client is stuck forever in mptcp_sendmsg() because the socket is not TCP_ESTABLISHED crash> bt 4847 PID: 4847 TASK: ffff88814b2fb100 CPU: 1 COMMAND: "gh35" #0 [ffff8881376ff680] __schedule at ffffffff97248da4 #1 [ffff8881376ff778] schedule at ffffffff9724a34f #2 [ffff8881376ff7a0] schedule_timeout at ffffffff97252ba0 #3 [ffff8881376ff8a8] wait_woken at ffffffff958ab4ba #4 [ffff8881376ff940] sk_stream_wait_connect at ffffffff96c2d859 #5 [ffff8881376ffa28] mptcp_sendmsg at ffffffff97207fca #6 [ffff8881376ffbc0] sock_sendmsg at ffffffff96be1b5b #7 [ffff8881376ffbe8] sock_write_iter at ffffffff96be1daa #8 [ffff8881376ffce8] new_sync_write at ffffffff95e5cb52 #9 [ffff8881376ffe50] vfs_write at ffffffff95e6547f #10 [ffff8881376ffe90] ksys_write at ffffffff95e65d26 #11 [ffff8881376fff28] do_syscall_64 at ffffffff956088ba #12 [ffff8881376fff50] entry_SYSCALL_64_after_hwframe at ffffffff9740008c RIP: 00007f126f6956ed RSP: 00007ffc2a320278 RFLAGS: 00000217 RAX: ffffffffffffffda RBX: 0000000020000044 RCX: 00007f126f6956ed RDX: 0000000000000004 RSI: 00000000004007b8 RDI: 0000000000000003 RBP: 00007ffc2a3202a0 R8: 0000000000400720 R9: 0000000000400720 R10: 0000000000400720 R11: 0000000000000217 R12: 00000000004004b0 R13: 00007ffc2a320380 R14: 0000000000000000 R15: 0000000000000000 ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b 3) tcpdump captures show that DSS is exchanged even when MP_CAPABLE handshake didn't complete. $ tcpdump -tnnr bad.pcap IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [S], seq 3208913911, win 65483, options [mss 65495,sackOK,TS val 3291706876 ecr 3291694721,nop,wscale 7,mptcp capable v1], length 0 IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [S.], seq 3208913911, ack 3208913912, win 65483, options [mss 65495,sackOK,TS val 3291706876 ecr 3291706876,nop,wscale 7,mptcp capable v1], length 0 IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [.], ack 1, win 512, options [nop,nop,TS val 3291706876 ecr 3291706876], length 0 IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [F.], seq 1, ack 1, win 512, options [nop,nop,TS val 3291707876 ecr 3291706876,mptcp dss fin seq 0 subseq 0 len 1,nop,nop], length 0 IP 127.0.0.1.20000 > 127.0.0.1.20000: Flags [.], ack 2, win 512, options [nop,nop,TS val 3291707876 ecr 3291707876], length 0 force a fallback to TCP in these cases, and adjust the main socket state to avoid hanging in mptcp_sendmsg(). Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/35 Reported-by: Christoph Paasch Suggested-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 10 ++++++++++ net/mptcp/subflow.c | 10 ++++++++++ 2 files changed, 20 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a709df659ae0..1d05d9841b5c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -490,4 +490,14 @@ static inline void mptcp_do_fallback(struct sock *sk) #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) +static inline bool subflow_simultaneous_connect(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct sock *parent = subflow->conn; + + return sk->sk_state == TCP_ESTABLISHED && + !mptcp_sk(parent)->pm.server_side && + !subflow->conn_finished; +} + #endif /* __MPTCP_PROTOCOL_H */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index cb8a42ff4646..548f9e347ff5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1128,6 +1128,16 @@ static void subflow_state_change(struct sock *sk) __subflow_state_change(sk); + if (subflow_simultaneous_connect(sk)) { + mptcp_do_fallback(sk); + pr_fallback(mptcp_sk(parent)); + subflow->conn_finished = 1; + if (inet_sk_state_load(parent) == TCP_SYN_SENT) { + inet_sk_state_store(parent, TCP_ESTABLISHED); + parent->sk_state_change(parent); + } + } + /* as recvmsg() does not acquire the subflow socket for ssk selection * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. -- cgit From d2f77c53342e3e13ce1fd7f9638ee200026e14f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Jun 2020 22:26:22 +0200 Subject: mptcp: check for plain TCP sock at accept time This cleanup the code a bit and avoid corrupted states on weird syscall sequence (accept(), connect()). Signed-off-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 69 ++++++---------------------------------------------- 1 file changed, 7 insertions(+), 62 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 84ae96be9837..dbeb6fe374f5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -52,13 +52,10 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) return msk->subflow; } -static struct socket *mptcp_is_tcpsk(struct sock *sk) +static bool mptcp_is_tcpsk(struct sock *sk) { struct socket *sock = sk->sk_socket; - if (sock->sk != sk) - return NULL; - if (unlikely(sk->sk_prot == &tcp_prot)) { /* we are being invoked after mptcp_accept() has * accepted a non-mp-capable flow: sk is a tcp_sk, @@ -68,27 +65,21 @@ static struct socket *mptcp_is_tcpsk(struct sock *sk) * bypass mptcp. */ sock->ops = &inet_stream_ops; - return sock; + return true; #if IS_ENABLED(CONFIG_MPTCP_IPV6) } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { sock->ops = &inet6_stream_ops; - return sock; + return true; #endif } - return NULL; + return false; } static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) { - struct socket *sock; - sock_owned_by_me((const struct sock *)msk); - sock = mptcp_is_tcpsk((struct sock *)msk); - if (unlikely(sock)) - return sock; - if (likely(!__mptcp_check_fallback(msk))) return NULL; @@ -1466,7 +1457,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, return NULL; pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk)); - if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; @@ -1821,42 +1811,6 @@ unlock: return err; } -static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - if (sock->sk->sk_prot == &tcp_prot) { - /* we are being invoked from __sys_accept4, after - * mptcp_accept() has just accepted a non-mp-capable - * flow: sk is a tcp_sk, not an mptcp one. - * - * Hand the socket over to tcp so all further socket ops - * bypass mptcp. - */ - sock->ops = &inet_stream_ops; - } - - return inet_getname(sock, uaddr, peer); -} - -#if IS_ENABLED(CONFIG_MPTCP_IPV6) -static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - if (sock->sk->sk_prot == &tcpv6_prot) { - /* we are being invoked from __sys_accept4 after - * mptcp_accept() has accepted a non-mp-capable - * subflow: sk is a tcp_sk, not mptcp. - * - * Hand the socket over to tcp so all further - * socket ops bypass mptcp. - */ - sock->ops = &inet6_stream_ops; - } - - return inet6_getname(sock, uaddr, peer); -} -#endif - static int mptcp_listen(struct socket *sock, int backlog) { struct mptcp_sock *msk = mptcp_sk(sock->sk); @@ -1885,15 +1839,6 @@ unlock: return err; } -static bool is_tcp_proto(const struct proto *p) -{ -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - return p == &tcp_prot || p == &tcpv6_prot; -#else - return p == &tcp_prot; -#endif -} - static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { @@ -1915,7 +1860,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, release_sock(sock->sk); err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) { + if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { struct mptcp_sock *msk = mptcp_sk(newsock->sk); struct mptcp_subflow_context *subflow; @@ -2011,7 +1956,7 @@ static const struct proto_ops mptcp_stream_ops = { .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, .accept = mptcp_stream_accept, - .getname = mptcp_v4_getname, + .getname = inet_getname, .poll = mptcp_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, @@ -2065,7 +2010,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, .accept = mptcp_stream_accept, - .getname = mptcp_v6_getname, + .getname = inet6_getname, .poll = mptcp_poll, .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, -- cgit From fa68018dc45e3faee9d866d5dc484d141e8f1093 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Jun 2020 22:26:23 +0200 Subject: mptcp: create first subflow at msk creation time This cleans the code a bit and makes the behavior more consistent. Signed-off-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 53 ++++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index dbeb6fe374f5..ad619bda71cc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -86,32 +86,16 @@ static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) return msk->subflow; } -static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) -{ - return !msk->first; -} - -static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) +static int __mptcp_socket_create(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; struct socket *ssock; int err; - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) - return ssock; - - ssock = __mptcp_nmpc_socket(msk); - if (ssock) - goto set_state; - - if (!__mptcp_can_create_subflow(msk)) - return ERR_PTR(-EINVAL); - err = mptcp_subflow_create_socket(sk, &ssock); if (err) - return ERR_PTR(err); + return err; msk->first = ssock->sk; msk->subflow = ssock; @@ -124,10 +108,7 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) */ RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq); -set_state: - if (state != MPTCP_SAME_STATE) - inet_sk_state_store(sk, state); - return ssock; + return 0; } static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -1255,6 +1236,10 @@ static int mptcp_init_sock(struct sock *sk) if (ret) return ret; + ret = __mptcp_socket_create(mptcp_sk(sk)); + if (ret) + return ret; + sk_sockets_allocated_inc(sk); sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2]; @@ -1744,9 +1729,9 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int err; lock_sock(sock->sk); - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; goto unlock; } @@ -1776,13 +1761,14 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto do_connect; } - mptcp_token_destroy(msk); - ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; goto unlock; } + mptcp_token_destroy(msk); + inet_sk_state_store(sock->sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssock->sk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -1820,13 +1806,14 @@ static int mptcp_listen(struct socket *sock, int backlog) pr_debug("msk=%p", msk); lock_sock(sock->sk); - mptcp_token_destroy(msk); - ssock = __mptcp_socket_create(msk, TCP_LISTEN); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; goto unlock; } + mptcp_token_destroy(msk); + inet_sk_state_store(sock->sk, TCP_LISTEN); sock_set_flag(sock->sk, SOCK_RCU_FREE); err = ssock->ops->listen(ssock, backlog); -- cgit From 76660afbb7a1ac6bef0be34c4c6e76d7e07b74d7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Jun 2020 22:26:24 +0200 Subject: mptcp: __mptcp_tcp_fallback() returns a struct sock Currently __mptcp_tcp_fallback() always return NULL on incoming connections, because MPTCP does not create the additional socket for the first subflow. Since the previous commit no __mptcp_tcp_fallback() caller needs a struct socket, so let __mptcp_tcp_fallback() return the first subflow sock and cope correctly even with incoming connections. Signed-off-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ad619bda71cc..f2b2bd37e371 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -76,14 +76,14 @@ static bool mptcp_is_tcpsk(struct sock *sk) return false; } -static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) +static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { sock_owned_by_me((const struct sock *)msk); if (likely(!__mptcp_check_fallback(msk))) return NULL; - return msk->subflow; + return msk->first; } static int __mptcp_socket_create(struct mptcp_sock *msk) @@ -1498,7 +1498,7 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -1509,11 +1509,10 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); + ssk = __mptcp_tcp_fallback(msk); release_sock(sk); - if (ssock) - return tcp_setsockopt(ssock->sk, level, optname, optval, - optlen); + if (ssk) + return tcp_setsockopt(ssk, level, optname, optval, optlen); return -EOPNOTSUPP; } @@ -1522,7 +1521,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -1533,11 +1532,10 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); + ssk = __mptcp_tcp_fallback(msk); release_sock(sk); - if (ssock) - return tcp_getsockopt(ssock->sk, level, optname, optval, - option); + if (ssk) + return tcp_getsockopt(ssk, level, optname, optval, option); return -EOPNOTSUPP; } -- cgit From 8a05661b2b266b6dc45af255b3037b00ef31d85d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Jun 2020 22:26:25 +0200 Subject: mptcp: close poll() races mptcp_poll always return POLLOUT for unblocking connect(), ensure that the socket is a suitable state. The MPTCP_DATA_READY bit is never cleared on accept: ensure we don't leave mptcp_accept() with an empty accept queue and such bit set. Signed-off-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f2b2bd37e371..28ec26d97f96 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1841,6 +1841,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) goto unlock_fail; + clear_bit(MPTCP_DATA_READY, &msk->flags); sock_hold(ssock->sk); release_sock(sock->sk); @@ -1861,6 +1862,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, } } + if (inet_csk_listen_poll(ssock->sk)) + set_bit(MPTCP_DATA_READY, &msk->flags); sock_put(ssock->sk); return err; @@ -1869,21 +1872,33 @@ unlock_fail: return -EINVAL; } +static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +{ + return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : + 0; +} + static __poll_t mptcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { struct sock *sk = sock->sk; struct mptcp_sock *msk; __poll_t mask = 0; + int state; msk = mptcp_sk(sk); sock_poll_wait(file, sock, wait); - if (test_bit(MPTCP_DATA_READY, &msk->flags)) - mask = EPOLLIN | EPOLLRDNORM; - if (sk_stream_is_writeable(sk) && - test_bit(MPTCP_SEND_SPACE, &msk->flags)) - mask |= EPOLLOUT | EPOLLWRNORM; + state = inet_sk_state_load(sk); + if (state == TCP_LISTEN) + return mptcp_check_readable(msk); + + if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { + mask |= mptcp_check_readable(msk); + if (sk_stream_is_writeable(sk) && + test_bit(MPTCP_SEND_SPACE, &msk->flags)) + mask |= EPOLLOUT | EPOLLWRNORM; + } if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; -- cgit From 5f035af76e51cd622abc6564d5512ffeb9e06917 Mon Sep 17 00:00:00 2001 From: Po Liu Date: Mon, 29 Jun 2020 14:54:16 +0800 Subject: net:qos: police action offloading parameter 'burst' change to the original value Since 'tcfp_burst' with TICK factor, driver side always need to recover it to the original value, this patch moves the generic calculation and recover to the 'burst' original value before offloading to device driver. Signed-off-by: Po Liu Acked-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1b14d5f57e7f..e9e119ea6813 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3660,7 +3660,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, tcf_sample_get_group(entry, act); } else if (is_tcf_police(act)) { entry->id = FLOW_ACTION_POLICE; - entry->police.burst = tcf_police_tcfp_burst(act); + entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); -- cgit From ecc31c60240b9808a274befc5db6b8a249a6ade1 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 29 Jun 2020 23:46:16 +0300 Subject: ethtool: Add link extended state Currently, drivers can only tell whether the link is up/down using LINKSTATE_GET, but no additional information is given. Add attributes to LINKSTATE_GET command in order to allow drivers to expose the user more information in addition to link state to ease the debug process, for example, reason for link down state. Extended state consists of two attributes - link_ext_state and link_ext_substate. The idea is to avoid 'vendor specific' states in order to prevent drivers to use specific link_ext_state that can be in the future common link_ext_state. The substates allows drivers to add more information to the common link_ext_state. For example, vendor can expose 'Autoneg' as link_ext_state and add 'No partner detected during force mode' as link_ext_substate. If a driver cannot pinpoint the extended state with the substate accuracy, it is free to expose only the extended state and omit the substate attribute. Signed-off-by: Amit Cohen Reviewed-by: Jiri Pirko Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ethtool/linkstate.c | 52 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index afe5ac8a0f00..4834091ec24c 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -9,10 +9,12 @@ struct linkstate_req_info { }; struct linkstate_reply_data { - struct ethnl_reply_data base; - int link; - int sqi; - int sqi_max; + struct ethnl_reply_data base; + int link; + int sqi; + int sqi_max; + bool link_ext_state_provided; + struct ethtool_link_ext_state_info ethtool_link_ext_state_info; }; #define LINKSTATE_REPDATA(__reply_base) \ @@ -25,6 +27,8 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = { [ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_EXT_STATE] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_EXT_SUBSTATE] = { .type = NLA_REJECT }, }; static int linkstate_get_sqi(struct net_device *dev) @@ -61,6 +65,23 @@ static int linkstate_get_sqi_max(struct net_device *dev) mutex_unlock(&phydev->lock); return ret; +}; + +static int linkstate_get_link_ext_state(struct net_device *dev, + struct linkstate_reply_data *data) +{ + int err; + + if (!dev->ethtool_ops->get_link_ext_state) + return -EOPNOTSUPP; + + err = dev->ethtool_ops->get_link_ext_state(dev, &data->ethtool_link_ext_state_info); + if (err) + return err; + + data->link_ext_state_provided = true; + + return 0; } static int linkstate_prepare_data(const struct ethnl_req_info *req_base, @@ -86,6 +107,12 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, goto out; data->sqi_max = ret; + if (dev->flags & IFF_UP) { + ret = linkstate_get_link_ext_state(dev, data); + if (ret < 0 && ret != -EOPNOTSUPP && ret != -ENODATA) + goto out; + } + ret = 0; out: ethnl_ops_complete(dev); @@ -107,6 +134,12 @@ static int linkstate_reply_size(const struct ethnl_req_info *req_base, if (data->sqi_max != -EOPNOTSUPP) len += nla_total_size(sizeof(u32)); + if (data->link_ext_state_provided) + len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_STATE */ + + if (data->ethtool_link_ext_state_info.__link_ext_substate) + len += nla_total_size(sizeof(u8)); /* LINKSTATE_EXT_SUBSTATE */ + return len; } @@ -128,6 +161,17 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) return -EMSGSIZE; + if (data->link_ext_state_provided) { + if (nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_STATE, + data->ethtool_link_ext_state_info.link_ext_state)) + return -EMSGSIZE; + + if (data->ethtool_link_ext_state_info.__link_ext_substate && + nla_put_u8(skb, ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, + data->ethtool_link_ext_state_info.__link_ext_substate)) + return -EMSGSIZE; + } + return 0; } -- cgit From f53b9b0bdc59c0823679f2e3214e0d538f5951b9 Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Sun, 31 May 2020 22:26:23 +0200 Subject: netfilter: introduce support for reject at prerouting stage REJECT statement can be only used in INPUT, FORWARD and OUTPUT chains. This patch adds support of REJECT, both icmp and tcp reset, at PREROUTING stage. The need for this patch comes from the requirement of some forwarding devices to reject traffic before the natting and routing decisions. The main use case is to be able to send a graceful termination to legitimate clients that, under any circumstances, the NATed endpoints are not available. This option allows clients to decide either to perform a reconnection or manage the error in their side, instead of just dropping the connection and let them die due to timeout. It is supported ipv4, ipv6 and inet families for nft infrastructure. Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 21 +++++++++++++++++++++ net/ipv6/netfilter/nf_reject_ipv6.c | 26 ++++++++++++++++++++++++++ net/netfilter/nft_reject.c | 3 ++- 3 files changed, 49 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 2361fdac2c43..9dcfa4e461b6 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -96,6 +96,21 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, } EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); +static int nf_reject_fill_skb_dst(struct sk_buff *skb_in) +{ + struct dst_entry *dst = NULL; + struct flowi fl; + + memset(&fl, 0, sizeof(struct flowi)); + fl.u.ip4.daddr = ip_hdr(skb_in)->saddr; + nf_ip_route(dev_net(skb_in->dev), &dst, &fl, false); + if (!dst) + return -1; + + skb_dst_set(skb_in, dst); + return 0; +} + /* Send RST reply */ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) { @@ -109,6 +124,9 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) if (!oth) return; + if (hook == NF_INET_PRE_ROUTING && nf_reject_fill_skb_dst(oldskb)) + return; + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) return; @@ -175,6 +193,9 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) if (iph->frag_off & htons(IP_OFFSET)) return; + if (hook == NF_INET_PRE_ROUTING && nf_reject_fill_skb_dst(skb_in)) + return; + if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) { icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); return; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 5fae66f66671..4aef6baaa55e 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -126,6 +126,21 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, } EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); +static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) +{ + struct dst_entry *dst = NULL; + struct flowi fl; + + memset(&fl, 0, sizeof(struct flowi)); + fl.u.ip6.daddr = ipv6_hdr(skb_in)->saddr; + nf_ip6_route(dev_net(skb_in->dev), &dst, &fl, false); + if (!dst) + return -1; + + skb_dst_set(skb_in, dst); + return 0; +} + void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) { struct net_device *br_indev __maybe_unused; @@ -154,6 +169,14 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) fl6.daddr = oip6h->saddr; fl6.fl6_sport = otcph->dest; fl6.fl6_dport = otcph->source; + + if (hook == NF_INET_PRE_ROUTING) { + nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); + if (!dst) + return; + skb_dst_set(oldskb, dst); + } + fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); @@ -245,6 +268,9 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = net->loopback_dev; + if (hooknum == NF_INET_PRE_ROUTING && nf_reject6_fill_skb_dst(skb_in)) + return; + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); } EXPORT_SYMBOL_GPL(nf_send_unreach6); diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 86eafbb0fdd0..61fb7e8afbf0 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -30,7 +30,8 @@ int nft_reject_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | - (1 << NF_INET_LOCAL_OUT)); + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_PRE_ROUTING)); } EXPORT_SYMBOL_GPL(nft_reject_validate); -- cgit From d61d2e902aa0561e5f4b6348514fb35de544aa1f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sun, 14 Jun 2020 23:42:07 +0200 Subject: netfilter: nft_set_pipapo: Drop useless assignment of scratch map index on insert In nft_pipapo_insert(), we need to reallocate scratch maps that will be used for matching by lookup functions, if they have never been allocated or if the bucket size changes as a result of the insertion. As pipapo_realloc_scratch() provides a pair of fresh, zeroed out maps, there's no need to select a particular one after reallocation. Other than being useless, the existing assignment was also troubled by the fact that the index was set only on the CPU performing the actual insertion, as spotted by Florian. Simply drop the assignment. Reported-by: Florian Westphal Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 8c04388296b0..313de1d73168 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1249,8 +1249,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (err) return err; - this_cpu_write(nft_pipapo_scratch_index, false); - m->bsize_max = bsize_max; } else { put_cpu_ptr(m->scratch); -- cgit From 857ca89711de3dbcc674d58a6d7d297ee0bd34e1 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 21 Jun 2020 18:40:30 +0300 Subject: ipvs: register hooks only with services Keep the IPVS hooks registered in Netfilter only while there are configured virtual services. This saves CPU cycles while IPVS is loaded but not used. Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 80 ++++++++++++++++++++++++++++++++--------- net/netfilter/ipvs/ip_vs_ctl.c | 23 ++++++++++-- 2 files changed, 84 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index aa6a603a2425..ca3670152565 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2256,7 +2256,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, #endif -static const struct nf_hook_ops ip_vs_ops[] = { +static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply4, @@ -2302,7 +2302,10 @@ static const struct nf_hook_ops ip_vs_ops[] = { .hooknum = NF_INET_FORWARD, .priority = 100, }, +}; + #ifdef CONFIG_IP_VS_IPV6 +static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, @@ -2348,8 +2351,64 @@ static const struct nf_hook_ops ip_vs_ops[] = { .hooknum = NF_INET_FORWARD, .priority = 100, }, -#endif }; +#endif + +int ip_vs_register_hooks(struct netns_ipvs *ipvs, unsigned int af) +{ + const struct nf_hook_ops *ops; + unsigned int count; + unsigned int afmask; + int ret = 0; + + if (af == AF_INET6) { +#ifdef CONFIG_IP_VS_IPV6 + ops = ip_vs_ops6; + count = ARRAY_SIZE(ip_vs_ops6); + afmask = 2; +#else + return -EINVAL; +#endif + } else { + ops = ip_vs_ops4; + count = ARRAY_SIZE(ip_vs_ops4); + afmask = 1; + } + + if (!(ipvs->hooks_afmask & afmask)) { + ret = nf_register_net_hooks(ipvs->net, ops, count); + if (ret >= 0) + ipvs->hooks_afmask |= afmask; + } + return ret; +} + +void ip_vs_unregister_hooks(struct netns_ipvs *ipvs, unsigned int af) +{ + const struct nf_hook_ops *ops; + unsigned int count; + unsigned int afmask; + + if (af == AF_INET6) { +#ifdef CONFIG_IP_VS_IPV6 + ops = ip_vs_ops6; + count = ARRAY_SIZE(ip_vs_ops6); + afmask = 2; +#else + return; +#endif + } else { + ops = ip_vs_ops4; + count = ARRAY_SIZE(ip_vs_ops4); + afmask = 1; + } + + if (ipvs->hooks_afmask & afmask) { + nf_unregister_net_hooks(ipvs->net, ops, count); + ipvs->hooks_afmask &= ~afmask; + } +} + /* * Initialize IP Virtual Server netns mem. */ @@ -2425,19 +2484,6 @@ static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list) } } -static int __net_init __ip_vs_dev_init(struct net *net) -{ - int ret; - - ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); - if (ret < 0) - goto hook_fail; - return 0; - -hook_fail: - return ret; -} - static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) { struct netns_ipvs *ipvs; @@ -2446,7 +2492,8 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list) EnterFunction(2); list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + ip_vs_unregister_hooks(ipvs, AF_INET); + ip_vs_unregister_hooks(ipvs, AF_INET6); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs); @@ -2462,7 +2509,6 @@ static struct pernet_operations ipvs_core_ops = { }; static struct pernet_operations ipvs_core_dev_ops = { - .init = __ip_vs_dev_init, .exit_batch = __ip_vs_dev_cleanup_batch, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 412656c34f20..0eed388c960b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1272,6 +1272,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; + int ret_hooks = -1; /* increase the module use count */ if (!ip_vs_use_count_inc()) @@ -1313,6 +1314,14 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, } #endif + if ((u->af == AF_INET && !ipvs->num_services) || + (u->af == AF_INET6 && !ipvs->num_services6)) { + ret = ip_vs_register_hooks(ipvs, u->af); + if (ret < 0) + goto out_err; + ret_hooks = ret; + } + svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); if (svc == NULL) { IP_VS_DBG(1, "%s(): no memory\n", __func__); @@ -1374,6 +1383,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) ipvs->num_services++; + else if (svc->af == AF_INET6) + ipvs->num_services6++; /* Hash the service into the service table */ ip_vs_svc_hash(svc); @@ -1385,6 +1396,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, out_err: + if (ret_hooks >= 0) + ip_vs_unregister_hooks(ipvs, u->af); if (svc != NULL) { ip_vs_unbind_scheduler(svc, sched); ip_vs_service_free(svc); @@ -1500,9 +1513,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_pe *old_pe; struct netns_ipvs *ipvs = svc->ipvs; - /* Count only IPv4 services for old get/setsockopt interface */ - if (svc->af == AF_INET) + if (svc->af == AF_INET) { ipvs->num_services--; + if (!ipvs->num_services) + ip_vs_unregister_hooks(ipvs, svc->af); + } else if (svc->af == AF_INET6) { + ipvs->num_services6--; + if (!ipvs->num_services6) + ip_vs_unregister_hooks(ipvs, svc->af); + } ip_vs_stop_estimator(svc->ipvs, &svc->stats); -- cgit From 65951a9eb65e79b5a056fa6912682d91136507a1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 29 Jun 2020 21:43:13 -0700 Subject: net: dsa: Improve subordinate PHY error message It is not very informative to know the DSA master device when a subordinate network device fails to get its PHY setup. Provide the device name and capitalize PHY while we are it. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 4c7f086a047b..e147e10b411c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1795,7 +1795,8 @@ int dsa_slave_create(struct dsa_port *port) ret = dsa_slave_phy_setup(slave_dev); if (ret) { - netdev_err(master, "error %d setting up slave phy\n", ret); + netdev_err(master, "error %d setting up slave PHY for %s\n", + ret, slave_dev->name); goto out_gcells; } -- cgit From 6bad912b7e5ab51c23d8fa8362ca2d4ceeebdb74 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jun 2020 16:38:26 +0200 Subject: mptcp: do nonce initialization at subflow creation time This clean-up the code a bit, reduces the number of used hooks and indirect call requested, and allow better error reporting from __mptcp_subflow_connect() Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 54 ++++++++++++++++++++--------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 548f9e347ff5..664aa9158363 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -29,34 +29,6 @@ static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, MPTCP_INC_STATS(sock_net(req_to_sk(req)), field); } -static int subflow_rebuild_header(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - int local_id; - - if (subflow->request_join && !subflow->local_nonce) { - struct mptcp_sock *msk = (struct mptcp_sock *)subflow->conn; - - pr_debug("subflow=%p", sk); - - do { - get_random_bytes(&subflow->local_nonce, sizeof(u32)); - } while (!subflow->local_nonce); - - if (subflow->local_id) - goto out; - - local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); - if (local_id < 0) - return -EINVAL; - - subflow->local_id = local_id; - } - -out: - return subflow->icsk_af_ops->rebuild_header(sk); -} - static void subflow_req_destructor(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -984,7 +956,9 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; struct sockaddr_storage addr; + int local_id = loc->id; struct socket *sf; + struct sock *ssk; u32 remote_token; int addrlen; int err; @@ -996,7 +970,20 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, if (err) return err; - subflow = mptcp_subflow_ctx(sf->sk); + ssk = sf->sk; + subflow = mptcp_subflow_ctx(ssk); + do { + get_random_bytes(&subflow->local_nonce, sizeof(u32)); + } while (!subflow->local_nonce); + + if (!local_id) { + err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk); + if (err < 0) + goto failed; + + local_id = err; + } + subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; @@ -1007,15 +994,16 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, if (loc->family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - sf->sk->sk_bound_dev_if = ifindex; + ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) goto failed; mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); - pr_debug("msk=%p remote_token=%u", msk, remote_token); + pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token, + local_id); subflow->remote_token = remote_token; - subflow->local_id = loc->id; + subflow->local_id = local_id; subflow->request_join = 1; subflow->request_bkup = 1; mptcp_info2sockaddr(remote, &addr); @@ -1288,7 +1276,6 @@ void __init mptcp_subflow_init(void) subflow_specific.conn_request = subflow_v4_conn_request; subflow_specific.syn_recv_sock = subflow_syn_recv_sock; subflow_specific.sk_rx_dst_set = subflow_finish_connect; - subflow_specific.rebuild_header = subflow_rebuild_header; #if IS_ENABLED(CONFIG_MPTCP_IPV6) subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; @@ -1298,7 +1285,6 @@ void __init mptcp_subflow_init(void) subflow_v6_specific.conn_request = subflow_v6_conn_request; subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock; subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect; - subflow_v6_specific.rebuild_header = subflow_rebuild_header; subflow_v6m_specific = subflow_v6_specific; subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; -- cgit From ff91e9292fc5aafd9ee1dc44c03cff69a3b0f39f Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Tue, 30 Jun 2020 09:49:33 -0700 Subject: tcp: call tcp_ack_tstamp() when not fully acked When skb is coalesced tcp_ack_tstamp() still needs to be called when not fully acked in tcp_clean_rtx_queue(), otherwise SCM_TSTAMP_ACK timestamps may never be fired. Since the original patch series had dependent commits, this patch fixes the issue instead of reverting by restoring calls to tcp_ack_tstamp() when skb is not fully acked. Fixes: fdb7eb21ddd3 ("tcp: stamp SCM_TSTAMP_ACK later in tcp_clean_rtx_queue()") Signed-off-by: Yousuk Seung Signed-off-by: Willem de Bruijn Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8479b84f0a7f..12c26c9565b7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3172,8 +3172,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) tp->snd_up = tp->snd_una; - if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - flag |= FLAG_SACK_RENEGING; + if (skb) { + tcp_ack_tstamp(sk, skb, prior_snd_una); + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + flag |= FLAG_SACK_RENEGING; + } if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); -- cgit From fe189519e4d3ce655efa0bc64a6bed3f6f1ad1c2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 29 Jun 2020 14:03:39 +0200 Subject: net: devres: rename the release callback of devm_register_netdev() Make it an explicit counterpart to devm_register_netdev() just like we do with devm_free_netdev() for better clarity. Signed-off-by: Bartosz Golaszewski Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/devres.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/devres.c b/net/devres.c index 57a6a88d11f6..1f9be2133787 100644 --- a/net/devres.c +++ b/net/devres.c @@ -39,7 +39,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, } EXPORT_SYMBOL(devm_alloc_etherdev_mqs); -static void devm_netdev_release(struct device *dev, void *this) +static void devm_unregister_netdev(struct device *dev, void *this) { struct net_device_devres *res = this; @@ -77,7 +77,7 @@ int devm_register_netdev(struct device *dev, struct net_device *ndev) netdev_devres_match, ndev))) return -EINVAL; - dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL); + dr = devres_alloc(devm_unregister_netdev, sizeof(*dr), GFP_KERNEL); if (!dr) return -ENOMEM; -- cgit From f9200a52eedfe96ae8d9cbf68a363f5409a46117 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 20 Jun 2020 13:03:55 +0300 Subject: ipvs: avoid expiring many connections from timer Add new functions ip_vs_conn_del() and ip_vs_conn_del_put() to release many IPVS connections in process context. They are suitable for connections found in table when we do not want to overload the timers. Currently, the change is useful for the dropentry delayed work but it will be used also in following patch when flushing connections to failed destinations. Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 53 +++++++++++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_ctl.c | 6 +++-- 2 files changed, 42 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 02f2f636798d..b3921ae92740 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -807,6 +807,31 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) kmem_cache_free(ip_vs_conn_cachep, cp); } +/* Try to delete connection while not holding reference */ +static void ip_vs_conn_del(struct ip_vs_conn *cp) +{ + if (del_timer(&cp->timer)) { + /* Drop cp->control chain too */ + if (cp->control) + cp->timeout = 0; + ip_vs_conn_expire(&cp->timer); + } +} + +/* Try to delete connection while holding reference */ +static void ip_vs_conn_del_put(struct ip_vs_conn *cp) +{ + if (del_timer(&cp->timer)) { + /* Drop cp->control chain too */ + if (cp->control) + cp->timeout = 0; + __ip_vs_conn_put(cp); + ip_vs_conn_expire(&cp->timer); + } else { + __ip_vs_conn_put(cp); + } +} + static void ip_vs_conn_expire(struct timer_list *t) { struct ip_vs_conn *cp = from_timer(cp, t, timer); @@ -827,14 +852,17 @@ static void ip_vs_conn_expire(struct timer_list *t) /* does anybody control me? */ if (ct) { + bool has_ref = !cp->timeout && __ip_vs_conn_get(ct); + ip_vs_control_del(cp); /* Drop CTL or non-assured TPL if not used anymore */ - if (!cp->timeout && !atomic_read(&ct->n_control) && + if (has_ref && !atomic_read(&ct->n_control) && (!(ct->flags & IP_VS_CONN_F_TEMPLATE) || !(ct->state & IP_VS_CTPL_S_ASSURED))) { IP_VS_DBG(4, "drop controlling connection\n"); - ct->timeout = 0; - ip_vs_conn_expire_now(ct); + ip_vs_conn_del_put(ct); + } else if (has_ref) { + __ip_vs_conn_put(ct); } } @@ -1317,8 +1345,7 @@ try_drop: drop: IP_VS_DBG(4, "drop connection\n"); - cp->timeout = 0; - ip_vs_conn_expire_now(cp); + ip_vs_conn_del(cp); } cond_resched_rcu(); } @@ -1341,19 +1368,15 @@ flush_again: hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (cp->ipvs != ipvs) continue; - /* As timers are expired in LIFO order, restart - * the timer of controlling connection first, so - * that it is expired after us. - */ + if (atomic_read(&cp->n_control)) + continue; cp_c = cp->control; - /* cp->control is valid only with reference to cp */ - if (cp_c && __ip_vs_conn_get(cp)) { + IP_VS_DBG(4, "del connection\n"); + ip_vs_conn_del(cp); + if (cp_c && !atomic_read(&cp_c->n_control)) { IP_VS_DBG(4, "del controlling connection\n"); - ip_vs_conn_expire_now(cp_c); - __ip_vs_conn_put(cp); + ip_vs_conn_del(cp_c); } - IP_VS_DBG(4, "del connection\n"); - ip_vs_conn_expire_now(cp); } cond_resched_rcu(); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0eed388c960b..4af83f466dfc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -224,7 +224,8 @@ static void defense_work_handler(struct work_struct *work) update_defense_level(ipvs); if (atomic_read(&ipvs->dropentry)) ip_vs_random_dropentry(ipvs); - schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + queue_delayed_work(system_long_wq, &ipvs->defense_work, + DEFENSE_TIMER_PERIOD); } #endif @@ -4082,7 +4083,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); - schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + queue_delayed_work(system_long_wq, &ipvs->defense_work, + DEFENSE_TIMER_PERIOD); return 0; } -- cgit From 6b207d66aa9fad0deed13d5f824e1ea193b0a777 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 30 Jun 2020 10:29:10 -0700 Subject: bpf: Fix net/core/filter build errors when INET is not enabled Fix build errors when CONFIG_INET is not set/enabled. (.text+0x2b1b): undefined reference to `tcp_prot' (.text+0x2b3b): undefined reference to `tcp_prot' Signed-off-by: Randy Dunlap Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/b1a858ec-7e04-56bc-248a-62cb9bbee726@infradead.org --- net/core/filter.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index c796e141ea8e..c5e696e6c315 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9328,8 +9328,10 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) { +#ifdef CONFIG_INET if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; +#endif #if IS_BUILTIN(CONFIG_IPV6) if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) @@ -9350,8 +9352,10 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = { BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) { +#ifdef CONFIG_INET if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; +#endif #if IS_BUILTIN(CONFIG_IPV6) if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) -- cgit From 2a6d6c31f136c136c05455788579586874a0525c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jul 2020 16:04:33 +0100 Subject: net/packet: remove redundant initialization of variable err The variable err is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 29bd405adbbd..7b436ebde61d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4293,7 +4293,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; __be16 num; - int err = -EINVAL; + int err; /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; -- cgit From b97e9d9d67c88bc413a3c27734d45d98d8d52b00 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Wed, 1 Jul 2020 16:01:52 -0700 Subject: net: sched: Allow changing default qdisc to FQ-PIE Similar to fq_codel and the other qdiscs that can set as default, fq_pie is also suitable for general use without explicit configuration, which makes it a valid choice for this. This is useful in situations where a painless out-of-the-box solution for reducing bufferbloat is desired but fq_codel is not necessarily the best choice. For example, fq_pie can be better for DASH streaming, but there could be more cases where it's the better choice of the two simple AQMs available in the kernel. Signed-off-by: Danny Lin Signed-off-by: David S. Miller --- net/sched/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 84badf00647e..a3b37d88800e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -468,6 +468,9 @@ choice config DEFAULT_FQ_CODEL bool "Fair Queue Controlled Delay" if NET_SCH_FQ_CODEL + config DEFAULT_FQ_PIE + bool "Flow Queue Proportional Integral controller Enhanced" if NET_SCH_FQ_PIE + config DEFAULT_SFQ bool "Stochastic Fair Queue" if NET_SCH_SFQ @@ -480,6 +483,7 @@ config DEFAULT_NET_SCH default "pfifo_fast" if DEFAULT_PFIFO_FAST default "fq" if DEFAULT_FQ default "fq_codel" if DEFAULT_FQ_CODEL + default "fq_pie" if DEFAULT_FQ_PIE default "sfq" if DEFAULT_SFQ default "pfifo_fast" endif -- cgit From a6b118febbab3f6454057612b355d0b667c1fafa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Jun 2020 21:24:45 +0200 Subject: mptcp: add receive buffer auto-tuning When mptcp is used, userspace doesn't read from the tcp (subflow) socket but from the parent (mptcp) socket receive queue. skbs are moved from the subflow socket to the mptcp rx queue either from 'data_ready' callback (if mptcp socket can be locked), a work queue, or the socket receive function. This means tcp_rcv_space_adjust() is never called and thus no receive buffer size auto-tuning is done. An earlier (not merged) patch added tcp_rcv_space_adjust() calls to the function that moves skbs from subflow to mptcp socket. While this enabled autotuning, it also meant tuning was done even if userspace was reading the mptcp socket very slowly. This adds mptcp_rcv_space_adjust() and calls it after userspace has read data from the mptcp socket rx queue. Its very similar to tcp_rcv_space_adjust, with two differences: 1. The rtt estimate is the largest one observed on a subflow 2. The rcvbuf size and window clamp of all subflows is adjusted to the mptcp-level rcvbuf. Otherwise, we get spurious drops at tcp (subflow) socket level if the skbs are not moved to the mptcp socket fast enough. Before: time mptcp_connect.sh -t -f $((4*1024*1024)) -d 300 -l 0.01% -r 0 -e "" -m mmap [..] ns4 MPTCP -> ns3 (10.0.3.2:10108 ) MPTCP (duration 40823ms) [ OK ] ns4 MPTCP -> ns3 (10.0.3.2:10109 ) TCP (duration 23119ms) [ OK ] ns4 TCP -> ns3 (10.0.3.2:10110 ) MPTCP (duration 5421ms) [ OK ] ns4 MPTCP -> ns3 (dead:beef:3::2:10111) MPTCP (duration 41446ms) [ OK ] ns4 MPTCP -> ns3 (dead:beef:3::2:10112) TCP (duration 23427ms) [ OK ] ns4 TCP -> ns3 (dead:beef:3::2:10113) MPTCP (duration 5426ms) [ OK ] Time: 1396 seconds After: ns4 MPTCP -> ns3 (10.0.3.2:10108 ) MPTCP (duration 5417ms) [ OK ] ns4 MPTCP -> ns3 (10.0.3.2:10109 ) TCP (duration 5427ms) [ OK ] ns4 TCP -> ns3 (10.0.3.2:10110 ) MPTCP (duration 5422ms) [ OK ] ns4 MPTCP -> ns3 (dead:beef:3::2:10111) MPTCP (duration 5415ms) [ OK ] ns4 MPTCP -> ns3 (dead:beef:3::2:10112) TCP (duration 5422ms) [ OK ] ns4 TCP -> ns3 (dead:beef:3::2:10113) MPTCP (duration 5423ms) [ OK ] Time: 296 seconds Signed-off-by: Florian Westphal Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++--- net/mptcp/protocol.h | 7 +++ net/mptcp/subflow.c | 5 ++- 3 files changed, 127 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 28ec26d97f96..fa137a9c42d1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -179,13 +179,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, return false; } - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); - - if (rcvbuf > sk->sk_rcvbuf) - sk->sk_rcvbuf = rcvbuf; - } - tp = tcp_sk(ssk); do { u32 map_remaining, offset; @@ -916,6 +909,100 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, return copied; } +/* receive buffer autotuning. See tcp_rcv_space_adjust for more information. + * + * Only difference: Use highest rtt estimate of the subflows in use. + */ +static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + u32 time, advmss = 1; + u64 rtt_us, mstamp; + + sock_owned_by_me(sk); + + if (copied <= 0) + return; + + msk->rcvq_space.copied += copied; + + mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC); + time = tcp_stamp_us_delta(mstamp, msk->rcvq_space.time); + + rtt_us = msk->rcvq_space.rtt_us; + if (rtt_us && time < (rtt_us >> 3)) + return; + + rtt_us = 0; + mptcp_for_each_subflow(msk, subflow) { + const struct tcp_sock *tp; + u64 sf_rtt_us; + u32 sf_advmss; + + tp = tcp_sk(mptcp_subflow_tcp_sock(subflow)); + + sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us); + sf_advmss = READ_ONCE(tp->advmss); + + rtt_us = max(sf_rtt_us, rtt_us); + advmss = max(sf_advmss, advmss); + } + + msk->rcvq_space.rtt_us = rtt_us; + if (time < (rtt_us >> 3) || rtt_us == 0) + return; + + if (msk->rcvq_space.copied <= msk->rcvq_space.space) + goto new_measure; + + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int rcvmem, rcvbuf; + u64 rcvwin, grow; + + rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss; + + grow = rcvwin * (msk->rcvq_space.copied - msk->rcvq_space.space); + + do_div(grow, msk->rcvq_space.space); + rcvwin += (grow << 1); + + rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER); + while (tcp_win_from_space(sk, rcvmem) < advmss) + rcvmem += 128; + + do_div(rcvwin, advmss); + rcvbuf = min_t(u64, rcvwin * rcvmem, + sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + + if (rcvbuf > sk->sk_rcvbuf) { + u32 window_clamp; + + window_clamp = tcp_win_from_space(sk, rcvbuf); + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); + + /* Make subflows follow along. If we do not do this, we + * get drops at subflow level if skbs can't be moved to + * the mptcp rx queue fast enough (announced rcv_win can + * exceed ssk->sk_rcvbuf). + */ + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(subflow); + WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); + tcp_sk(ssk)->window_clamp = window_clamp; + } + } + } + + msk->rcvq_space.space = msk->rcvq_space.copied; +new_measure: + msk->rcvq_space.copied = 0; + msk->rcvq_space.time = mstamp; +} + static bool __mptcp_move_skbs(struct mptcp_sock *msk) { unsigned int moved = 0; @@ -1028,6 +1115,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, set_bit(MPTCP_DATA_READY, &msk->flags); } out_err: + mptcp_rcv_space_adjust(msk, copied); + release_sock(sk); return copied; } @@ -1241,6 +1330,7 @@ static int mptcp_init_sock(struct sock *sk) return ret; sk_sockets_allocated_inc(sk); + sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2]; return 0; @@ -1423,6 +1513,22 @@ struct sock *mptcp_sk_clone(const struct sock *sk, return nsk; } +void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) +{ + const struct tcp_sock *tp = tcp_sk(ssk); + + msk->rcvq_space.copied = 0; + msk->rcvq_space.rtt_us = 0; + + msk->rcvq_space.time = tp->tcp_mstamp; + + /* initial rcv_space offering made to peer */ + msk->rcvq_space.space = min_t(u32, tp->rcv_wnd, + TCP_INIT_CWND * tp->advmss); + if (msk->rcvq_space.space == 0) + msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; +} + static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, bool kern) { @@ -1471,6 +1577,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, list_add(&subflow->node, &msk->conn_list); inet_sk_state_store(newsk, TCP_ESTABLISHED); + mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(new_mptcp_sock); __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); @@ -1631,6 +1738,8 @@ void mptcp_finish_connect(struct sock *ssk) atomic64_set(&msk->snd_una, msk->write_seq); mptcp_pm_new_connection(msk, 0); + + mptcp_rcv_space_init(msk, ssk); } static void mptcp_sock_graft(struct sock *sk, struct socket *parent) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1d05d9841b5c..a6412ff0fddb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -209,6 +209,12 @@ struct mptcp_sock { struct socket *subflow; /* outgoing connect/listener/!mp_capable */ struct sock *first; struct mptcp_pm_data pm; + struct { + u32 space; /* bytes copied in last measurement window */ + u32 copied; /* bytes copied in this measurement window */ + u64 time; /* start time of measurement window */ + u64 rtt_us; /* last maximum rtt of subflows */ + } rcvq_space; }; #define mptcp_for_each_subflow(__msk, __subflow) \ @@ -369,6 +375,7 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); +void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 664aa9158363..e1e19c76e267 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -225,8 +225,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) pr_fallback(mptcp_sk(subflow->conn)); } - if (mptcp_check_fallback(sk)) + if (mptcp_check_fallback(sk)) { + mptcp_rcv_space_init(mptcp_sk(parent), sk); return; + } if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), @@ -1118,6 +1120,7 @@ static void subflow_state_change(struct sock *sk) if (subflow_simultaneous_connect(sk)) { mptcp_do_fallback(sk); + mptcp_rcv_space_init(mptcp_sk(parent), sk); pr_fallback(mptcp_sk(parent)); subflow->conn_finished = 1; if (inet_sk_state_load(parent) == TCP_SYN_SENT) { -- cgit From df42ef227dc4fdb7acc8d755e029ed27a2e814f8 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 2 Jul 2020 10:13:06 +0200 Subject: bridge: mrp: Add br_mrp_fill_info Add the function br_mrp_fill_info which populates the MRP attributes regarding the status of each MRP instance. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp_netlink.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 7 +++++ 2 files changed, 71 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index 34b3a8776991..c4f5c356811f 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -304,6 +304,70 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, return 0; } +int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) +{ + struct nlattr *tb, *mrp_tb; + struct br_mrp *mrp; + + mrp_tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP); + if (!mrp_tb) + return -EMSGSIZE; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list) { + struct net_bridge_port *p; + + tb = nla_nest_start_noflag(skb, IFLA_BRIDGE_MRP_INFO); + if (!tb) + goto nla_info_failure; + + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ID, + mrp->ring_id)) + goto nla_put_failure; + + p = rcu_dereference(mrp->p_port); + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_P_IFINDEX, + p->dev->ifindex)) + goto nla_put_failure; + + p = rcu_dereference(mrp->s_port); + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_S_IFINDEX, + p->dev->ifindex)) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO, + mrp->prio)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_STATE, + mrp->ring_state)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_RING_ROLE, + mrp->ring_role)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_INTERVAL, + mrp->test_interval)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MAX_MISS, + mrp->test_max_miss)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_TEST_MONITOR, + mrp->test_monitor)) + goto nla_put_failure; + + nla_nest_end(skb, tb); + } + nla_nest_end(skb, mrp_tb); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tb); + +nla_info_failure: + nla_nest_cancel(skb, mrp_tb); + + return -EMSGSIZE; +} + int br_mrp_port_open(struct net_device *dev, u8 loc) { struct net_bridge_port *p; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6a7d8e218ae7..65d2c163a24a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1317,6 +1317,7 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb); bool br_mrp_enabled(struct net_bridge *br); void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p); +int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br); #else static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, @@ -1339,6 +1340,12 @@ static inline void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p) { } + +static inline int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) +{ + return 0; +} + #endif /* br_netlink.c */ -- cgit From 36a8e8e26542056bbd7eb5e047cadee30587d230 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 2 Jul 2020 10:13:07 +0200 Subject: bridge: Extend br_fill_ifinfo to return MPR status This patch extends the function br_fill_ifinfo to return also the MRP status for each instance on a bridge. It also adds a new filter RTEXT_FILTER_MRP to return the MRP status only when this is set, not to interfer with the vlans. The MRP status is return only on the bridge interfaces. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 240e260e3461..c532fa65c983 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -453,6 +453,28 @@ static int br_fill_ifinfo(struct sk_buff *skb, rcu_read_unlock(); if (err) goto nla_put_failure; + + nla_nest_end(skb, af); + } + + if (filter_mask & RTEXT_FILTER_MRP) { + struct nlattr *af; + int err; + + if (!br_mrp_enabled(br) || port) + goto done; + + af = nla_nest_start_noflag(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + rcu_read_lock(); + err = br_mrp_fill_info(skb, br); + rcu_read_unlock(); + + if (err) + goto nla_put_failure; + nla_nest_end(skb, af); } @@ -516,7 +538,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_bridge_port *port = br_port_get_rtnl(dev); if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN) && - !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) + !(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) && + !(filter_mask & RTEXT_FILTER_MRP)) return 0; return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags, -- cgit From cd8700e45e73c5e5c32260ebc3092e50c1853e5d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 3 Jul 2020 16:43:08 -0400 Subject: ipv6/ping: set skb->mark on icmpv6 sockets IPv6 ping sockets route based on fwmark, but do not yet set skb->mark. Add this. IPv4 ping sockets also do both. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv6/ping.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 98ac32b49d8c..6caa062f68e7 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -114,6 +114,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); ipcm6_init_sk(&ipc6, np); + ipc6.sockc.mark = sk->sk_mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false); -- cgit From f0a5e4d7a594e0fe237d3dfafb069bb82f80f42f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 1 Jul 2020 18:17:19 +0300 Subject: ipvs: allow connection reuse for unconfirmed conntrack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit YangYuxi is reporting that connection reuse is causing one-second delay when SYN hits existing connection in TIME_WAIT state. Such delay was added to give time to expire both the IPVS connection and the corresponding conntrack. This was considered a rare case at that time but it is causing problem for some environments such as Kubernetes. As nf_conntrack_tcp_packet() can decide to release the conntrack in TIME_WAIT state and to replace it with a fresh NEW conntrack, we can use this to allow rescheduling just by tuning our check: if the conntrack is confirmed we can not schedule it to different real server and the one-second delay still applies but if new conntrack was created, we are free to select new real server without any delays. YangYuxi lists some of the problem reports: - One second connection delay in masquerading mode: https://marc.info/?t=151683118100004&r=1&w=2 - IPVS low throughput #70747 https://github.com/kubernetes/kubernetes/issues/70747 - Apache Bench can fill up ipvs service proxy in seconds #544 https://github.com/cloudnativelabs/kube-router/issues/544 - Additional 1s latency in `host -> service IP -> pod` https://github.com/kubernetes/kubernetes/issues/90854 Fixes: f719e3754ee2 ("ipvs: drop first packet to redirect conntrack") Co-developed-by: YangYuxi Signed-off-by: YangYuxi Signed-off-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index ca3670152565..b4a6b7662f3f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2066,14 +2066,14 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { - bool uses_ct = false, resched = false; + bool old_ct = false, resched = false; if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && unlikely(!atomic_read(&cp->dest->weight))) { resched = true; - uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); } else if (is_new_conn_expected(cp, conn_reuse_mode)) { - uses_ct = ip_vs_conn_uses_conntrack(cp, skb); + old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!atomic_read(&cp->n_control)) { resched = true; } else { @@ -2081,15 +2081,17 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int * that uses conntrack while it is still * referenced by controlled connection(s). */ - resched = !uses_ct; + resched = !old_ct; } } if (resched) { + if (!old_ct) + cp->flags &= ~IP_VS_CONN_F_NFCT; if (!atomic_read(&cp->n_control)) ip_vs_conn_expire_now(cp); __ip_vs_conn_put(cp); - if (uses_ct) + if (old_ct) return NF_DROP; cp = NULL; } -- cgit From 74cccc3d38438b346e40a4f8133cff3f0839ff84 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:11 +0200 Subject: netfilter: nf_tables: add NFTA_CHAIN_ID attribute This netlink attribute allows you to refer to chains inside a transaction as an alternative to the name and the handle. The chain binding support requires this new chain ID approach. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7647ecfa0d40..650ef0dd0773 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -280,9 +280,15 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) if (trans == NULL) return ERR_PTR(-ENOMEM); - if (msg_type == NFT_MSG_NEWCHAIN) + if (msg_type == NFT_MSG_NEWCHAIN) { nft_activate_next(ctx->net, ctx->chain); + if (ctx->nla[NFTA_CHAIN_ID]) { + nft_trans_chain_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); + } + } + list_add_tail(&trans->list, &ctx->net->nft.commit_list); return trans; } @@ -1274,6 +1280,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, + [NFTA_CHAIN_ID] = { .type = NLA_U32 }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -2154,9 +2161,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; + struct nft_chain *chain = NULL; const struct nlattr *attr; struct nft_table *table; - struct nft_chain *chain; u8 policy = NF_ACCEPT; struct nft_ctx ctx; u64 handle = 0; @@ -2181,7 +2188,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return PTR_ERR(chain); } attr = nla[NFTA_CHAIN_HANDLE]; - } else { + } else if (nla[NFTA_CHAIN_NAME]) { chain = nft_chain_lookup(net, table, attr, genmask); if (IS_ERR(chain)) { if (PTR_ERR(chain) != -ENOENT) { @@ -2190,6 +2197,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, } chain = NULL; } + } else if (!nla[NFTA_CHAIN_ID]) { + return -EINVAL; } if (nla[NFTA_CHAIN_POLICY]) { -- cgit From 837830a4b439bfeb86c70b0115c280377c84714b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:16 +0200 Subject: netfilter: nf_tables: add NFTA_RULE_CHAIN_ID attribute This new netlink attribute allows you to add rules to chains by the chain ID. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 650ef0dd0773..fbe8f9209813 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2153,6 +2153,22 @@ err: return err; } +static struct nft_chain *nft_chain_lookup_byid(const struct net *net, + const struct nlattr *nla) +{ + u32 id = ntohl(nla_get_be32(nla)); + struct nft_trans *trans; + + list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_chain *chain = trans->ctx.chain; + + if (trans->msg_type == NFT_MSG_NEWCHAIN && + id == nft_trans_chain_id(trans)) + return chain; + } + return ERR_PTR(-ENOENT); +} + static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -2633,6 +2649,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { .len = NFT_USERDATA_MAXLEN }, [NFTA_RULE_ID] = { .type = NLA_U32 }, [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 }, + [NFTA_RULE_CHAIN_ID] = { .type = NLA_U32 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, @@ -3039,10 +3056,21 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return PTR_ERR(table); } - chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); - if (IS_ERR(chain)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); - return PTR_ERR(chain); + if (nla[NFTA_RULE_CHAIN]) { + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], + genmask); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); + return PTR_ERR(chain); + } + } else if (nla[NFTA_RULE_CHAIN_ID]) { + chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]); + if (IS_ERR(chain)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]); + return PTR_ERR(chain); + } + } else { + return -EINVAL; } if (nla[NFTA_RULE_HANDLE]) { -- cgit From 51d70f181ff4e2c996ddf256af1efecd7d5864e5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:21 +0200 Subject: netfilter: nf_tables: add NFTA_VERDICT_CHAIN_ID attribute This netlink attribute allows you to identify the chain to jump/goto by means of the chain ID. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fbe8f9209813..d86602797a69 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8242,6 +8242,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, + [NFTA_VERDICT_CHAIN_ID] = { .type = NLA_U32 }, }; static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, @@ -8278,10 +8279,19 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, break; case NFT_JUMP: case NFT_GOTO: - if (!tb[NFTA_VERDICT_CHAIN]) + if (tb[NFTA_VERDICT_CHAIN]) { + chain = nft_chain_lookup(ctx->net, ctx->table, + tb[NFTA_VERDICT_CHAIN], + genmask); + } else if (tb[NFTA_VERDICT_CHAIN_ID]) { + chain = nft_chain_lookup_byid(ctx->net, + tb[NFTA_VERDICT_CHAIN_ID]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { return -EINVAL; - chain = nft_chain_lookup(ctx->net, ctx->table, - tb[NFTA_VERDICT_CHAIN], genmask); + } + if (IS_ERR(chain)) return PTR_ERR(chain); if (nft_is_base_chain(chain)) -- cgit From 67c49de4ad862c567088c5119cf125e566f56e7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:25 +0200 Subject: netfilter: nf_tables: expose enum nft_chain_flags through UAPI This enum definition was never exposed through UAPI. Rename NFT_BASE_CHAIN to NFT_CHAIN_BASE for consistency. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d86602797a69..b7582a1c8dce 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1903,7 +1903,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, nft_basechain_hook_init(&basechain->ops, family, hook, chain); } - chain->flags |= NFT_BASE_CHAIN | flags; + chain->flags |= NFT_CHAIN_BASE | flags; basechain->policy = NF_ACCEPT; if (chain->flags & NFT_CHAIN_HW_OFFLOAD && nft_chain_offload_priority(basechain) < 0) @@ -2255,7 +2255,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - flags |= chain->flags & NFT_BASE_CHAIN; + flags |= chain->flags & NFT_CHAIN_BASE; return nf_tables_updchain(&ctx, genmask, policy, flags); } -- cgit From 04b7db414490ea9254d0c1d8930ea9571f8ce9f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:28 +0200 Subject: netfilter: nf_tables: add nft_chain_add() This patch adds a helper function to add the chain to the hashtable and the chain list. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7582a1c8dce..a7cb9c07802b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1914,6 +1914,20 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } +static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +{ + int err; + + err = rhltable_insert_key(&table->chains_ht, chain->name, + &chain->rhlhead, nft_chain_ht_params); + if (err) + return err; + + list_add_tail_rcu(&chain->list, &table->chains); + + return 0; +} + static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, u8 policy, u32 flags) { @@ -1991,16 +2005,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) goto err1; - err = rhltable_insert_key(&table->chains_ht, chain->name, - &chain->rhlhead, nft_chain_ht_params); - if (err) - goto err2; - trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - rhltable_remove(&table->chains_ht, &chain->rhlhead, - nft_chain_ht_params); goto err2; } @@ -2008,8 +2015,13 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nft_is_base_chain(chain)) nft_trans_chain_policy(trans) = policy; + err = nft_chain_add(table, chain); + if (err < 0) { + nft_trans_destroy(trans); + goto err2; + } + table->use++; - list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: -- cgit From d0e2c7de92c7f2b3d355ad76b0bb9fc43d1beb87 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Jun 2020 19:21:36 +0200 Subject: netfilter: nf_tables: add NFT_CHAIN_BINDING This new chain flag specifies that: * the kernel dynamically allocates the chain name, if no chain name is specified. * If the immediate expression that refers to this chain is removed, then this bound chain (and its content) is destroyed. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 86 +++++++++++++++++++++++++++++++++++++------ net/netfilter/nft_immediate.c | 51 +++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a7cb9c07802b..b8a970dad213 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1056,6 +1056,9 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; + if (nft_chain_is_bound(chain)) + continue; + ctx->chain = chain; err = nft_delrule_by_chain(ctx); @@ -1098,6 +1101,9 @@ static int nft_flush_table(struct nft_ctx *ctx) if (!nft_is_active_next(ctx->net, chain)) continue; + if (nft_chain_is_bound(chain)) + continue; + ctx->chain = chain; err = nft_delchain(ctx); @@ -1413,13 +1419,12 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, lockdep_commit_lock_is_held(net)); if (nft_dump_stats(skb, stats)) goto nla_put_failure; - - if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) && - nla_put_be32(skb, NFTA_CHAIN_FLAGS, - htonl(NFT_CHAIN_HW_OFFLOAD))) - goto nla_put_failure; } + if (chain->flags && + nla_put_be32(skb, NFTA_CHAIN_FLAGS, htonl(chain->flags))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; @@ -1621,7 +1626,7 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) kvfree(chain->rules_next); } -static void nf_tables_chain_destroy(struct nft_ctx *ctx) +void nf_tables_chain_destroy(struct nft_ctx *ctx) { struct nft_chain *chain = ctx->chain; struct nft_hook *hook, *next; @@ -1928,6 +1933,8 @@ static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) return 0; } +static u64 chain_id; + static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, u8 policy, u32 flags) { @@ -1936,6 +1943,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_base_chain *basechain; struct nft_stats __percpu *stats; struct net *net = ctx->net; + char name[NFT_NAME_MAXLEN]; struct nft_trans *trans; struct nft_chain *chain; struct nft_rule **rules; @@ -1947,6 +1955,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (nla[NFTA_CHAIN_HOOK]) { struct nft_chain_hook hook; + if (flags & NFT_CHAIN_BINDING) + return -EOPNOTSUPP; + err = nft_chain_parse_hook(net, nla, &hook, family, true); if (err < 0) return err; @@ -1976,16 +1987,33 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return err; } } else { + if (flags & NFT_CHAIN_BASE) + return -EINVAL; + if (flags & NFT_CHAIN_HW_OFFLOAD) + return -EOPNOTSUPP; + chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (chain == NULL) return -ENOMEM; + + chain->flags = flags; } ctx->chain = chain; INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->table = table; - chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL); + + if (nla[NFTA_CHAIN_NAME]) { + chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL); + } else { + if (!(flags & NFT_CHAIN_BINDING)) + return -EINVAL; + + snprintf(name, sizeof(name), "__chain%llu", ++chain_id); + chain->name = kstrdup(name, GFP_KERNEL); + } + if (!chain->name) { err = -ENOMEM; goto err1; @@ -2976,8 +3004,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -static void nf_tables_rule_release(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -3075,6 +3102,9 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } + if (nft_chain_is_bound(chain)) + return -EOPNOTSUPP; + } else if (nla[NFTA_RULE_CHAIN_ID]) { chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]); if (IS_ERR(chain)) { @@ -3294,6 +3324,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } + if (nft_chain_is_bound(chain)) + return -EOPNOTSUPP; } nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); @@ -5330,11 +5362,24 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, */ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { + struct nft_chain *chain; + struct nft_rule *rule; + if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use++; + chain = data->verdict.chain; + chain->use++; + + if (!nft_chain_is_bound(chain)) + break; + + chain->table->use++; + list_for_each_entry(rule, &chain->rules, list) + chain->use++; + + nft_chain_add(chain->table, chain); break; } } @@ -7474,7 +7519,7 @@ static void nft_obj_del(struct nft_object *obj) list_del_rcu(&obj->list); } -static void nft_chain_del(struct nft_chain *chain) +void nft_chain_del(struct nft_chain *chain) { struct nft_table *table = chain->table; @@ -7825,6 +7870,10 @@ static int __nf_tables_abort(struct net *net, bool autoload) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { + if (nft_chain_is_bound(trans->ctx.chain)) { + nft_trans_destroy(trans); + break; + } trans->ctx.table->use--; nft_chain_del(trans->ctx.chain); nf_tables_unregister_hook(trans->ctx.net, @@ -8321,10 +8370,23 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { + struct nft_chain *chain; + struct nft_rule *rule; + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->verdict.chain->use--; + chain = data->verdict.chain; + chain->use--; + + if (!nft_chain_is_bound(chain)) + break; + + chain->table->use--; + list_for_each_entry(rule, &chain->rules, list) + chain->use--; + + nft_chain_del(chain); break; } } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c7f0ef73d939..9e556638bb32 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -54,6 +54,23 @@ static int nft_immediate_init(const struct nft_ctx *ctx, if (err < 0) goto err1; + if (priv->dreg == NFT_REG_VERDICT) { + struct nft_chain *chain = priv->data.verdict.chain; + + switch (priv->data.verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + if (nft_chain_is_bound(chain)) { + err = -EBUSY; + goto err1; + } + chain->bound = true; + break; + default: + break; + } + } + return 0; err1: @@ -81,6 +98,39 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } +static void nft_immediate_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg != NFT_REG_VERDICT) + return; + + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + + if (!nft_chain_is_bound(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nf_tables_rule_release(&chain_ctx, rule); + + nf_tables_chain_destroy(&chain_ctx); + break; + default: + break; + } +} + static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); @@ -170,6 +220,7 @@ static const struct nft_expr_ops nft_imm_ops = { .init = nft_immediate_init, .activate = nft_immediate_activate, .deactivate = nft_immediate_deactivate, + .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .validate = nft_immediate_validate, .offload = nft_immediate_offload, -- cgit From c1f79a2eefdcc0aef5d7a911c27a3f75f1936ecd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 4 Jul 2020 02:51:28 +0200 Subject: netfilter: nf_tables: reject unsupported chain flags Bail out if userspace sends unsupported chain flags. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b8a970dad213..f96785586f64 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2285,6 +2285,9 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, else if (chain) flags = chain->flags; + if (flags & ~NFT_CHAIN_FLAGS) + return -EOPNOTSUPP; + nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla); if (chain != NULL) { -- cgit From 83f0c10bc36f956102ce4a33c5fe596ae9891297 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 5 Jul 2020 01:30:15 +0200 Subject: net: use mptcp setsockopt function for SOL_SOCKET on mptcp sockets setsockopt(mptcp_fd, SOL_SOCKET, ...)... appears to work (returns 0), but it has no effect -- this is because the MPTCP layer never has a chance to copy the settings to the subflow socket. Skip the generic handling for the mptcp case and instead call the mptcp specific handler instead for SOL_SOCKET too. Next patch adds more specific handling for SOL_SOCKET to mptcp. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 3 +++ net/socket.c | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index fa137a9c42d1..320f306ea85c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1609,6 +1609,9 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, pr_debug("msk=%p", msk); + if (level == SOL_SOCKET) + return sock_setsockopt(sk->sk_socket, level, optname, optval, optlen); + /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the * MPTCP-level socket to configure the subflows until the subflow diff --git a/net/socket.c b/net/socket.c index 976426d03f09..d87812a9ed4b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2080,6 +2080,17 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } +static bool sock_use_custom_sol_socket(const struct socket *sock) +{ + const struct sock *sk = sock->sk; + + /* Use sock->ops->setsockopt() for MPTCP */ + return IS_ENABLED(CONFIG_MPTCP) && + sk->sk_protocol == IPPROTO_MPTCP && + sk->sk_type == SOCK_STREAM && + (sk->sk_family == AF_INET || sk->sk_family == AF_INET6); +} + /* * Set a socket option. Because we don't know the option lengths we have * to pass the user mode parameter for the protocols to sort out. @@ -2118,7 +2129,7 @@ static int __sys_setsockopt(int fd, int level, int optname, optval = (char __user __force *)kernel_optval; } - if (level == SOL_SOCKET) + if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) err = sock_setsockopt(sock, level, optname, optval, optlen); -- cgit From fd1452d8ef988d228f5265147fde1017084404e4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 5 Jul 2020 01:30:16 +0200 Subject: mptcp: add REUSEADDR/REUSEPORT support This will e.g. make 'sshd restart' work when MPTCP is used, as we will now set this option on the listener socket instead of only the mptcp socket (where it has no effect). We still need to copy the setting to the master socket so that a subsequent getsockopt() returns the expected value. Reported-by: Christoph Paasch Suggested-by: Paolo Abeni Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 320f306ea85c..612f6d49f1bb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1601,6 +1601,37 @@ static void mptcp_destroy(struct sock *sk) sk_sockets_allocated_dec(sk); } +static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + struct socket *ssock; + int ret; + + switch (optname) { + case SO_REUSEPORT: + case SO_REUSEADDR: + lock_sock(sk); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + release_sock(sk); + return -EINVAL; + } + + ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); + if (ret == 0) { + if (optname == SO_REUSEPORT) + sk->sk_reuseport = ssock->sk->sk_reuseport; + else if (optname == SO_REUSEADDR) + sk->sk_reuse = ssock->sk->sk_reuse; + } + release_sock(sk); + return ret; + } + + return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); +} + static int mptcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -1610,7 +1641,7 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, pr_debug("msk=%p", msk); if (level == SOL_SOCKET) - return sock_setsockopt(sk->sk_socket, level, optname, optval, optlen); + return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); /* @@ the meaning of setsockopt() when the socket is connected and * there are multiple subflows is not yet defined. It is up to the -- cgit From c9b95a13598750e2840d99322f844ec0ff9e6246 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 5 Jul 2020 01:30:17 +0200 Subject: mptcp: support IPV6_V6ONLY setsockopt Without this, Opensshd fails to open an ipv6 socket listening socket: error: setsockopt IPV6_V6ONLY: Operation not supported error: Bind to port 22 on :: failed: Address already in use. Opensshd opens an ipv4 and and ipv6 listening socket, but because IPV6_V6ONLY setsockopt fails, the port number is already in use. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 612f6d49f1bb..3ab060e30038 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1632,6 +1632,33 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); } +static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = (struct sock *)msk; + int ret = -EOPNOTSUPP; + struct socket *ssock; + + switch (optname) { + case IPV6_V6ONLY: + lock_sock(sk); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + release_sock(sk); + return -EINVAL; + } + + ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); + if (ret == 0) + sk->sk_ipv6only = ssock->sk->sk_ipv6only; + + release_sock(sk); + break; + } + + return ret; +} + static int mptcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -1655,6 +1682,9 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, if (ssk) return tcp_setsockopt(ssk, level, optname, optval, optlen); + if (level == SOL_IPV6) + return mptcp_setsockopt_v6(msk, optname, optval, optlen); + return -EOPNOTSUPP; } -- cgit From a61bf20831d7fc77395d97777a9b511790fff621 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 5 Jul 2020 21:30:04 +0200 Subject: net: dsa: Add __percpu property to prevent warnings net/dsa/slave.c:505:13: warning: incorrect type in initializer (different address spaces) net/dsa/slave.c:505:13: expected void const [noderef] *__vpp_verify net/dsa/slave.c:505:13: got struct pcpu_sw_netstats * Add the needed _percpu property to prevent this warning. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index adecf73bd608..1653e3377cb3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -77,7 +77,7 @@ struct dsa_slave_priv { struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); - struct pcpu_sw_netstats *stats64; + struct pcpu_sw_netstats __percpu *stats64; struct gro_cells gcells; -- cgit From ed6444ea03841d6a24093eb25dca2eecc1860ca9 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 5 Jul 2020 21:30:05 +0200 Subject: net: dsa: tag_ksz: Fix __be16 warnings cpu_to_be16 returns a __be16 value. So what it is assigned to needs to have the same type to avoid warnings. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_ksz.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 90d055c4df9e..bd1a3158d79a 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -156,8 +156,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, { struct dsa_port *dp = dsa_slave_to_port(dev); struct sk_buff *nskb; - u16 *tag; + __be16 *tag; u8 *addr; + u16 val; nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN); if (!nskb) @@ -167,12 +168,12 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN); addr = skb_mac_header(nskb); - *tag = BIT(dp->index); + val = BIT(dp->index); if (is_link_local_ether_addr(addr)) - *tag |= KSZ9477_TAIL_TAG_OVERRIDE; + val |= KSZ9477_TAIL_TAG_OVERRIDE; - *tag = cpu_to_be16(*tag); + *tag = cpu_to_be16(val); return nskb; } -- cgit From 802734ad7753294dae69c431c45e0503dc33e97a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 5 Jul 2020 21:30:06 +0200 Subject: net: dsa: tag_lan9303: Fix __be16 warnings net/dsa/tag_lan9303.c:76:24: warning: incorrect type in assignment (different base types) net/dsa/tag_lan9303.c:76:24: expected unsigned short [usertype] net/dsa/tag_lan9303.c:76:24: got restricted __be16 [usertype] net/dsa/tag_lan9303.c:80:24: warning: incorrect type in assignment (different base types) net/dsa/tag_lan9303.c:80:24: expected unsigned short [usertype] net/dsa/tag_lan9303.c:80:24: got restricted __be16 [usertype] net/dsa/tag_lan9303.c:106:31: warning: restricted __be16 degrades to integer net/dsa/tag_lan9303.c:111:24: warning: cast to restricted __be16 net/dsa/tag_lan9303.c:111:24: warning: cast to restricted __be16 net/dsa/tag_lan9303.c:111:24: warning: cast to restricted __be16 net/dsa/tag_lan9303.c:111:24: warning: cast to restricted __be16 Make use of __be16 where appropriate to fix these warnings. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_lan9303.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index eb0e7a32e53d..ccfb6f641bbf 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -55,7 +55,8 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - u16 *lan9303_tag; + __be16 *lan9303_tag; + u16 tag; /* insert a special VLAN tag between the MAC addresses * and the current ethertype field. @@ -72,12 +73,12 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) /* make room between MACs and Ether-Type */ memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN); - lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); + lan9303_tag = (__be16 *)(skb->data + 2 * ETH_ALEN); + tag = lan9303_xmit_use_arl(dp, skb->data) ? + LAN9303_TAG_TX_USE_ALR : + dp->index | LAN9303_TAG_TX_STP_OVERRIDE; lan9303_tag[0] = htons(ETH_P_8021Q); - lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ? - LAN9303_TAG_TX_USE_ALR : - dp->index | LAN9303_TAG_TX_STP_OVERRIDE; - lan9303_tag[1] = htons(lan9303_tag[1]); + lan9303_tag[1] = htons(tag); return skb; } @@ -85,7 +86,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - u16 *lan9303_tag; + __be16 *lan9303_tag; u16 lan9303_tag1; unsigned int source_port; @@ -101,7 +102,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, * ^ * ->data */ - lan9303_tag = (u16 *)(skb->data - 2); + lan9303_tag = (__be16 *)(skb->data - 2); if (lan9303_tag[0] != htons(ETH_P_8021Q)) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n"); -- cgit From 04d63f9d91d018d4ec31832677f4045706defc18 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 5 Jul 2020 21:30:07 +0200 Subject: net: dsa: tag_mtk: Fix warnings for __be16 net/dsa/tag_mtk.c:84:13: warning: incorrect type in assignment (different base types) net/dsa/tag_mtk.c:84:13: expected restricted __be16 [usertype] hdr net/dsa/tag_mtk.c:84:13: got int net/dsa/tag_mtk.c:94:17: warning: restricted __be16 degrades to integer The result of a ntohs() is not __be16, but u16. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_mtk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index d6619edd53e5..f602fc758d68 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -67,8 +67,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { + u16 hdr; int port; - __be16 *phdr, hdr; + __be16 *phdr; unsigned char *dest = eth_hdr(skb)->h_dest; bool is_multicast_skb = is_multicast_ether_addr(dest) && !is_broadcast_ether_addr(dest); -- cgit From 99bac53d069a05dc680b3aa82614f964bc12f5f9 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 5 Jul 2020 21:30:08 +0200 Subject: net: dsa: tag_qca.c: Fix warning for __be16 vs u16 net/dsa/tag_qca.c:48:15: warning: incorrect type in assignment (different base types) net/dsa/tag_qca.c:48:15: expected unsigned short [usertype] net/dsa/tag_qca.c:48:15: got restricted __be16 [usertype] net/dsa/tag_qca.c:68:13: warning: incorrect type in assignment (different base types) net/dsa/tag_qca.c:68:13: expected restricted __be16 [usertype] hdr net/dsa/tag_qca.c:68:13: got int net/dsa/tag_qca.c:71:16: warning: restricted __be16 degrades to integer net/dsa/tag_qca.c:81:17: warning: restricted __be16 degrades to integer Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_qca.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 70db7c909f74..7066f5e697d7 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -31,7 +31,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - u16 *phdr, hdr; + __be16 *phdr; + u16 hdr; if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; @@ -39,7 +40,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) skb_push(skb, QCA_HDR_LEN); memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); - phdr = (u16 *)(skb->data + 2 * ETH_ALEN); + phdr = (__be16 *)(skb->data + 2 * ETH_ALEN); /* Set the version field, and set destination port information */ hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | @@ -54,8 +55,9 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { u8 ver; + u16 hdr; int port; - __be16 *phdr, hdr; + __be16 *phdr; if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) return NULL; -- cgit From 7a6498ebcdc0fde4613a20ae405481d49166e8bb Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 6 Jul 2020 19:38:50 +0200 Subject: Replace HTTP links with HTTPS ones: IPv* Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 8 ++++---- net/ipv4/cipso_ipv4.c | 4 ++-- net/ipv4/fib_trie.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/tcp_highspeed.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_veno.c | 2 +- net/ipv6/Kconfig | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e64e59b536d3..60db5a6487cc 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -10,7 +10,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - . For most people, it's safe to say N. + . For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -73,7 +73,7 @@ config IP_MULTIPLE_TABLES If you need more information, see the Linux Advanced Routing and Traffic Control documentation at - + If unsure, say N. @@ -280,7 +280,7 @@ config SYN_COOKIES continue to connect, even when your machine is under attack. There is no need for the legitimate users to change their TCP/IP software; SYN cookies work transparently to them. For technical information - about SYN cookies, check out . + about SYN cookies, check out . If you are SYN flooded, the source address reported by the kernel is likely to have been forged by the attacker; it is only reported as @@ -525,7 +525,7 @@ config TCP_CONG_HSTCP A modification to TCP's congestion control mechanism for use with large congestion windows. A table indicates how much to increase the congestion window by when an ACK is received. - For more detail see http://www.icir.org/floyd/hstcp.html + For more detail see https://www.icir.org/floyd/hstcp.html config TCP_CONG_HYBLA tristate "TCP-Hybla congestion control algorithm" diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a23094b050f8..0f1b9065c0a6 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -10,9 +10,9 @@ * * The CIPSO draft specification can be found in the kernel's Documentation * directory as well as the following URL: - * http://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt + * https://tools.ietf.org/id/draft-ietf-cipso-ipsecurity-01.txt * The FIPS-188 specification can be found at the following URL: - * http://www.itl.nist.gov/fipspubs/fip188.htm + * https://www.itl.nist.gov/fipspubs/fip188.htm * * Author: Paul Moore */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 248f1c1959a6..dcb0802a47d5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -13,7 +13,7 @@ * * An experimental study of compression methods for dynamic tries * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. - * http://www.csc.kth.se/~snilsson/software/dyntrie2/ + * https://www.csc.kth.se/~snilsson/software/dyntrie2/ * * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f8755a4ae9d4..a8b980ad11d4 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -3,7 +3,7 @@ * (C) 2003-2004 by Harald Welte * based on ideas of Fabio Olive Leite * - * Development of this code funded by SuSE Linux AG, http://www.suse.com/ + * Development of this code funded by SuSE Linux AG, https://www.suse.com/ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index bfdfbb972c57..349069d6cd0a 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -2,7 +2,7 @@ /* * Sally Floyd's High Speed TCP (RFC 3649) congestion control * - * See http://www.icir.org/floyd/hstcp.html + * See https://www.icir.org/floyd/hstcp.html * * John Heffner */ diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 88e1f011afe0..55adcfcf96fe 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -4,7 +4,7 @@ * R.N.Shorten, D.J.Leith: * "H-TCP: TCP for high-speed and long-distance networks" * Proc. PFLDnet, Argonne, 2004. - * http://www.hamilton.ie/net/htcp3.pdf + * https://www.hamilton.ie/net/htcp3.pdf */ #include diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 12c26c9565b7..dc77309ea15b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -518,7 +518,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss); * * The algorithm for RTT estimation w/o timestamps is based on * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. - * + * * * More detail on this code can be found at * , diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 50a9a6e2c4cd..cd50a61c9976 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -7,7 +7,7 @@ * "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks." * IEEE Journal on Selected Areas in Communication, * Feb. 2003. - * See http://www.ie.cuhk.edu.hk/fileadmin/staff_upload/soung/Journal/J3.pdf + * See https://www.ie.cuhk.edu.hk/fileadmin/staff_upload/soung/Journal/J3.pdf */ #include diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index f4f19e89af5e..76bff79d6fed 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -14,7 +14,7 @@ menuconfig IPV6 . For specific information about IPv6 under Linux, see Documentation/networking/ipv6.rst and read the HOWTO at - + To compile this protocol support as a module, choose M here: the module will be called ipv6. -- cgit From d47a72152097d7be7cfc453d205196c0aa976c33 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 6 Jul 2020 21:06:12 +0200 Subject: mptcp: fix race in subflow_data_ready() syzkaller was able to make the kernel reach subflow_data_ready() for a server subflow that was closed before subflow_finish_connect() completed. In these cases we can avoid using the path for regular/fallback MPTCP data, and just wake the main socket, to avoid the following warning: WARNING: CPU: 0 PID: 9370 at net/mptcp/subflow.c:885 subflow_data_ready+0x1e6/0x290 net/mptcp/subflow.c:885 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 9370 Comm: syz-executor.0 Not tainted 5.7.0 #106 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xb7/0xfe lib/dump_stack.c:118 panic+0x29e/0x692 kernel/panic.c:221 __warn.cold+0x2f/0x3d kernel/panic.c:582 report_bug+0x28b/0x2f0 lib/bug.c:195 fixup_bug arch/x86/kernel/traps.c:105 [inline] fixup_bug arch/x86/kernel/traps.c:100 [inline] do_error_trap+0x10f/0x180 arch/x86/kernel/traps.c:197 do_invalid_op+0x32/0x40 arch/x86/kernel/traps.c:216 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:1027 RIP: 0010:subflow_data_ready+0x1e6/0x290 net/mptcp/subflow.c:885 Code: 04 02 84 c0 74 06 0f 8e 91 00 00 00 41 0f b6 5e 48 31 ff 83 e3 18 89 de e8 37 ec 3d fe 84 db 0f 85 65 ff ff ff e8 fa ea 3d fe <0f> 0b e9 59 ff ff ff e8 ee ea 3d fe 48 89 ee 4c 89 ef e8 f3 77 ff RSP: 0018:ffff88811b2099b0 EFLAGS: 00010206 RAX: ffff888111197000 RBX: 0000000000000000 RCX: ffffffff82fbc609 RDX: 0000000000000100 RSI: ffffffff82fbc616 RDI: 0000000000000001 RBP: ffff8881111bc800 R08: ffff888111197000 R09: ffffed10222a82af R10: ffff888111541577 R11: ffffed10222a82ae R12: 1ffff11023641336 R13: ffff888111541000 R14: ffff88810fd4ca00 R15: ffff888111541570 tcp_child_process+0x754/0x920 net/ipv4/tcp_minisocks.c:841 tcp_v4_do_rcv+0x749/0x8b0 net/ipv4/tcp_ipv4.c:1642 tcp_v4_rcv+0x2666/0x2e60 net/ipv4/tcp_ipv4.c:1999 ip_protocol_deliver_rcu+0x29/0x1f0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:421 [inline] ip_local_deliver+0x2da/0x390 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:441 [inline] ip_rcv_finish net/ipv4/ip_input.c:428 [inline] ip_rcv_finish net/ipv4/ip_input.c:414 [inline] NF_HOOK include/linux/netfilter.h:421 [inline] ip_rcv+0xef/0x140 net/ipv4/ip_input.c:539 __netif_receive_skb_one_core+0x197/0x1e0 net/core/dev.c:5268 __netif_receive_skb+0x27/0x1c0 net/core/dev.c:5382 process_backlog+0x1e5/0x6d0 net/core/dev.c:6226 napi_poll net/core/dev.c:6671 [inline] net_rx_action+0x3e3/0xd70 net/core/dev.c:6739 __do_softirq+0x18c/0x634 kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 do_softirq.part.0+0x26/0x30 kernel/softirq.c:337 do_softirq arch/x86/include/asm/preempt.h:26 [inline] __local_bh_enable_ip+0x46/0x50 kernel/softirq.c:189 local_bh_enable include/linux/bottom_half.h:32 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:723 [inline] ip_finish_output2+0x78a/0x19c0 net/ipv4/ip_output.c:229 __ip_finish_output+0x471/0x720 net/ipv4/ip_output.c:306 dst_output include/net/dst.h:435 [inline] ip_local_out+0x181/0x1e0 net/ipv4/ip_output.c:125 __ip_queue_xmit+0x7a1/0x14e0 net/ipv4/ip_output.c:530 __tcp_transmit_skb+0x19dc/0x35e0 net/ipv4/tcp_output.c:1238 __tcp_send_ack.part.0+0x3c2/0x5b0 net/ipv4/tcp_output.c:3785 __tcp_send_ack net/ipv4/tcp_output.c:3791 [inline] tcp_send_ack+0x7d/0xa0 net/ipv4/tcp_output.c:3791 tcp_rcv_synsent_state_process net/ipv4/tcp_input.c:6040 [inline] tcp_rcv_state_process+0x36a4/0x49c2 net/ipv4/tcp_input.c:6209 tcp_v4_do_rcv+0x343/0x8b0 net/ipv4/tcp_ipv4.c:1651 sk_backlog_rcv include/net/sock.h:996 [inline] __release_sock+0x1ad/0x310 net/core/sock.c:2548 release_sock+0x54/0x1a0 net/core/sock.c:3064 inet_wait_for_connect net/ipv4/af_inet.c:594 [inline] __inet_stream_connect+0x57e/0xd50 net/ipv4/af_inet.c:686 inet_stream_connect+0x53/0xa0 net/ipv4/af_inet.c:725 mptcp_stream_connect+0x171/0x5f0 net/mptcp/protocol.c:1920 __sys_connect_file net/socket.c:1854 [inline] __sys_connect+0x267/0x2f0 net/socket.c:1871 __do_sys_connect net/socket.c:1882 [inline] __se_sys_connect net/socket.c:1879 [inline] __x64_sys_connect+0x6f/0xb0 net/socket.c:1879 do_syscall_64+0xb7/0x3d0 arch/x86/entry/common.c:295 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fb577d06469 Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ff 49 2b 00 f7 d8 64 89 01 48 RSP: 002b:00007fb5783d5dd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 000000000068bfa0 RCX: 00007fb577d06469 RDX: 000000000000004d RSI: 0000000020000040 RDI: 0000000000000003 RBP: 00000000ffffffff R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000041427c R14: 00007fb5783d65c0 R15: 0000000000000003 Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/39 Reported-by: Christoph Paasch Fixes: e1ff9e82e2ea ("net: mptcp: improve fallback to TCP") Suggested-by: Paolo Abeni Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e1e19c76e267..9f7f3772c13c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -873,7 +873,7 @@ static void subflow_data_ready(struct sock *sk) struct mptcp_sock *msk; msk = mptcp_sk(parent); - if (inet_sk_state_load(sk) == TCP_LISTEN) { + if ((1 << inet_sk_state_load(sk)) & (TCPF_LISTEN | TCPF_CLOSE)) { set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; -- cgit From 49b020c1d236a36a4533e7db6d2604cb57ed4c51 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Mon, 29 Jun 2020 16:11:00 +0000 Subject: Bluetooth: Adding a configurable autoconnect timeout This patch adds a configurable LE autoconnect timeout. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 1 + net/bluetooth/hci_event.c | 2 +- net/bluetooth/mgmt_config.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7959b851cc63..e6bf3d9f9d7a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3569,6 +3569,7 @@ struct hci_dev *hci_alloc_dev(void) hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES; hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION; + hdev->def_le_autoconnect_timeout = HCI_LE_AUTOCONN_TIMEOUT; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e060fc9ebb18..03a0759f2fc2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5315,7 +5315,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, } conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW, - HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER, + hdev->def_le_autoconnect_timeout, HCI_ROLE_MASTER, direct_rpa); if (!IS_ERR(conn)) { /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c index 8d01a8ff85e9..b30b571f8caf 100644 --- a/net/bluetooth/mgmt_config.c +++ b/net/bluetooth/mgmt_config.c @@ -17,6 +17,12 @@ { cpu_to_le16(hdev->_param_name_) } \ } +#define HDEV_PARAM_U16_JIFFIES_TO_MSECS(_param_code_, _param_name_) \ +{ \ + { cpu_to_le16(_param_code_), sizeof(__u16) }, \ + { cpu_to_le16(jiffies_to_msecs(hdev->_param_name_)) } \ +} + int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -59,6 +65,8 @@ int read_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, HDEV_PARAM_U16(0x0018, le_conn_max_interval), HDEV_PARAM_U16(0x0019, le_conn_latency), HDEV_PARAM_U16(0x001a, le_supv_timeout), + HDEV_PARAM_U16_JIFFIES_TO_MSECS(0x001b, + def_le_autoconnect_timeout), }; struct mgmt_rp_read_def_system_config *rp = (void *)params; @@ -129,6 +137,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, case 0x0018: case 0x0019: case 0x001a: + case 0x001b: if (len != sizeof(u16)) { bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d", len, sizeof(u16), type); @@ -238,6 +247,10 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data, case 0x0001a: hdev->le_supv_timeout = TLV_GET_LE16(buffer); break; + case 0x0001b: + hdev->def_le_autoconnect_timeout = + msecs_to_jiffies(TLV_GET_LE16(buffer)); + break; default: bt_dev_warn(hdev, "unsupported parameter %u", type); break; -- cgit From d4edda0f791fccf4cbb8a88566a8f2b1228faaee Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Mon, 29 Jun 2020 17:04:15 +0000 Subject: Bluetooth: use configured default params for active scans This patch fixes active scans to use the configured default parameters. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 116207009dde..68a2ec36e1c1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2767,8 +2767,9 @@ static int active_scan(struct hci_request *req, unsigned long opt) if (err < 0) own_addr_type = ADDR_LE_DEV_PUBLIC; - hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, DISCOV_LE_SCAN_WIN, - own_addr_type, filter_policy); + hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, + hdev->le_scan_window_discovery, own_addr_type, + filter_policy); return 0; } @@ -2815,18 +2816,18 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) * to do BR/EDR inquiry. */ hci_req_sync(hdev, interleaved_discov, - DISCOV_LE_SCAN_INT * 2, HCI_CMD_TIMEOUT, + hdev->le_scan_int_discovery * 2, HCI_CMD_TIMEOUT, status); break; } timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); - hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery, HCI_CMD_TIMEOUT, status); break; case DISCOV_TYPE_LE: timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + hci_req_sync(hdev, active_scan, hdev->le_scan_int_discovery, HCI_CMD_TIMEOUT, status); break; default: -- cgit From b83764f9220a4a14525657466f299850bbc98de9 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Mon, 29 Jun 2020 20:15:00 -0700 Subject: Bluetooth: Fix kernel oops triggered by hci_adv_monitors_clear() This fixes the kernel oops by removing unnecessary background scan update from hci_adv_monitors_clear() which shouldn't invoke any work queue. The following test was performed. - Run "rmmod btusb" and verify that no kernel oops is triggered. Signed-off-by: Miao-chen Chou Reviewed-by: Abhishek Pandit-Subedi Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e6bf3d9f9d7a..6509f785dd14 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3005,8 +3005,6 @@ void hci_adv_monitors_clear(struct hci_dev *hdev) hci_free_adv_monitor(monitor); idr_destroy(&hdev->adv_monitors_idr); - - hci_update_background_scan(hdev); } void hci_free_adv_monitor(struct adv_monitor *monitor) -- cgit From 15d8ce05ebec37a0d701cde768bbf21349f2329d Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Tue, 7 Jul 2020 17:46:06 +0200 Subject: Bluetooth: le_simult_central_peripheral experimental feature This patch adds an le_simult_central_peripheral features which allows a clients to determine if the controller is able to support peripheral and central connections separately and at the same time. Signed-off-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 5e9b9728eeac..d29da80e38fe 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3753,12 +3753,19 @@ static const u8 debug_uuid[16] = { }; #endif +/* 671b10b5-42c0-4696-9227-eb28d1b049d6 */ +static const u8 simult_central_periph_uuid[16] = { + 0xd6, 0x49, 0xb0, 0xd1, 0x28, 0xeb, 0x27, 0x92, + 0x96, 0x46, 0xc0, 0x42, 0xb5, 0x10, 0x1b, 0x67, +}; + static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[42]; + char buf[44]; struct mgmt_rp_read_exp_features_info *rp = (void *)buf; u16 idx = 0; + u32 flags; bt_dev_dbg(hdev, "sock %p", sk); @@ -3766,7 +3773,7 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, #ifdef CONFIG_BT_FEATURE_DEBUG if (!hdev) { - u32 flags = bt_dbg_get() ? BIT(0) : 0; + flags = bt_dbg_get() ? BIT(0) : 0; memcpy(rp->features[idx].uuid, debug_uuid, 16); rp->features[idx].flags = cpu_to_le32(flags); @@ -3774,6 +3781,20 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, } #endif + if (hdev) { + if (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && + (hdev->le_states[4] & 0x08) && /* Central */ + (hdev->le_states[4] & 0x40) && /* Peripheral */ + (hdev->le_states[3] & 0x10)) /* Simultaneous */ + flags = BIT(0); + else + flags = 0; + + memcpy(rp->features[idx].uuid, simult_central_periph_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable -- cgit From b416268b7a819c0508ed0dc81461e513b110f2ac Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 7 Jul 2020 14:40:48 +0200 Subject: mptcp: use mptcp worker for path management We can re-use the existing work queue to handle path management instead of a dedicated work queue. Just move pm_worker to protocol.c, call it from the mptcp worker and get rid of the msk lock (already held). Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm.c | 44 +------------------------------------------- net/mptcp/protocol.c | 27 ++++++++++++++++++++++++++- net/mptcp/protocol.h | 3 --- 3 files changed, 27 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 7de09fdd42a3..a8ad20559aaa 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -10,8 +10,6 @@ #include #include "protocol.h" -static struct workqueue_struct *pm_wq; - /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, @@ -78,7 +76,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return false; msk->pm.status |= BIT(new_status); - if (queue_work(pm_wq, &msk->pm.work)) + if (schedule_work(&msk->work)) sock_hold((struct sock *)msk); return true; } @@ -181,35 +179,6 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, skc); } -static void pm_worker(struct work_struct *work) -{ - struct mptcp_pm_data *pm = container_of(work, struct mptcp_pm_data, - work); - struct mptcp_sock *msk = container_of(pm, struct mptcp_sock, pm); - struct sock *sk = (struct sock *)msk; - - lock_sock(sk); - spin_lock_bh(&msk->pm.lock); - - pr_debug("msk=%p status=%x", msk, pm->status); - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); - mptcp_pm_nl_add_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); - mptcp_pm_nl_fully_established(msk); - } - if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); - mptcp_pm_nl_subflow_established(msk); - } - - spin_unlock_bh(&msk->pm.lock); - release_sock(sk); - sock_put(sk); -} - void mptcp_pm_data_init(struct mptcp_sock *msk) { msk->pm.add_addr_signaled = 0; @@ -223,22 +192,11 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) msk->pm.status = 0; spin_lock_init(&msk->pm.lock); - INIT_WORK(&msk->pm.work, pm_worker); mptcp_pm_nl_data_init(msk); } -void mptcp_pm_close(struct mptcp_sock *msk) -{ - if (cancel_work_sync(&msk->pm.work)) - sock_put((struct sock *)msk); -} - void __init mptcp_pm_init(void) { - pm_wq = alloc_workqueue("pm_wq", WQ_UNBOUND | WQ_MEM_RECLAIM, 8); - if (!pm_wq) - panic("Failed to allocate workqueue"); - mptcp_pm_nl_init(); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ab060e30038..dbe43e0cd734 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1214,6 +1214,29 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } +static void pm_work(struct mptcp_sock *msk) +{ + struct mptcp_pm_data *pm = &msk->pm; + + spin_lock_bh(&msk->pm.lock); + + pr_debug("msk=%p status=%x", msk, pm->status); + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); + mptcp_pm_nl_add_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); + mptcp_pm_nl_fully_established(msk); + } + if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); + mptcp_pm_nl_subflow_established(msk); + } + + spin_unlock_bh(&msk->pm.lock); +} + static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -1230,6 +1253,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_flush_join_list(msk); __mptcp_move_skbs(msk); + if (msk->pm.status) + pm_work(msk); + if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); @@ -1420,7 +1446,6 @@ static void mptcp_close(struct sock *sk, long timeout) } mptcp_cancel_work(sk); - mptcp_pm_close(msk); __skb_queue_purge(&sk->sk_receive_queue); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a6412ff0fddb..39bfec3f1586 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -174,8 +174,6 @@ struct mptcp_pm_data { u8 local_addr_max; u8 subflows_max; u8 status; - - struct work_struct work; }; struct mptcp_data_frag { @@ -412,7 +410,6 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); -void mptcp_pm_close(struct mptcp_sock *msk); void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); -- cgit From 4895d7808e7030c831f2ef83a3cc6ec0d46c30b1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Jul 2020 21:27:56 -0700 Subject: net: ethtool: Introduce ethtool_phy_ops In order to decouple ethtool from its PHY library dependency, define an ethtool_phy_ops singleton which can be overriden by the PHY library when it loads with an appropriate set of function pointers. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/ethtool/common.c | 11 +++++++++++ net/ethtool/common.h | 2 ++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/net/ethtool/common.c b/net/ethtool/common.c index aaecfc916a4d..ce4dbae5a943 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -2,6 +2,7 @@ #include #include +#include #include "common.h" @@ -373,3 +374,13 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) return 0; } + +const struct ethtool_phy_ops *ethtool_phy_ops; + +void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) +{ + rtnl_lock(); + ethtool_phy_ops = ops; + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops); diff --git a/net/ethtool/common.h b/net/ethtool/common.h index a62f68ccc43a..b83bef38368c 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -37,4 +37,6 @@ bool convert_legacy_settings_to_link_ksettings( int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); +extern const struct ethtool_phy_ops *ethtool_phy_ops; + #endif /* _ETHTOOL_COMMON_H */ -- cgit From f3631ab08eeb67ad023ba1b37004ecba0158f93f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Jul 2020 21:27:58 -0700 Subject: net: ethtool: Remove PHYLIB direct dependency Now that we have introduced ethtool_phy_ops and the PHY library dynamically registers its operations with that function pointer, we can remove the direct PHYLIB dependency in favor of using dynamic operations. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/Kconfig | 1 - net/ethtool/cabletest.c | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index d1672280d6a4..3831206977a1 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -455,7 +455,6 @@ config FAILOVER config ETHTOOL_NETLINK bool "Netlink interface for ethtool" default y - depends on PHYLIB=y || PHYLIB=n help An alternative userspace interface for ethtool based on generic netlink. It provides better extensibility and some new features, diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 7194956aa09e..888f6e101f34 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -58,6 +58,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ETHTOOL_A_CABLE_TEST_MAX + 1]; struct ethnl_req_info req_info = {}; + const struct ethtool_phy_ops *ops; struct net_device *dev; int ret; @@ -81,11 +82,17 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) } rtnl_lock(); + ops = ethtool_phy_ops; + if (!ops || !ops->start_cable_test) { + ret = -EOPNOTSUPP; + goto out_rtnl; + } + ret = ethnl_ops_begin(dev); if (ret < 0) goto out_rtnl; - ret = phy_start_cable_test(dev->phydev, info->extack); + ret = ops->start_cable_test(dev->phydev, info->extack); ethnl_ops_complete(dev); @@ -308,6 +315,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1]; struct ethnl_req_info req_info = {}; + const struct ethtool_phy_ops *ops; struct phy_tdr_config cfg; struct net_device *dev; int ret; @@ -337,11 +345,17 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) goto out_dev_put; rtnl_lock(); + ops = ethtool_phy_ops; + if (!ops || !ops->start_cable_test_tdr) { + ret = -EOPNOTSUPP; + goto out_rtnl; + } + ret = ethnl_ops_begin(dev); if (ret < 0) goto out_rtnl; - ret = phy_start_cable_test_tdr(dev->phydev, info->extack, &cfg); + ret = ops->start_cable_test_tdr(dev->phydev, info->extack, &cfg); ethnl_ops_complete(dev); -- cgit From 535094a0c9c4908a31f3158a647e203ceeecf277 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 6 Jul 2020 22:50:30 +0200 Subject: Replace HTTP links with HTTPS ones: X.25 network layer Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: David S. Miller --- net/x25/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/x25/Kconfig b/net/x25/Kconfig index e3ed23245a82..68729aa3a5d5 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -17,7 +17,7 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at and + You can read more about X.25 at and . Information about X.25 for Linux is contained in the files and -- cgit From 964201de695b8afca80652f048a95dc2cb68fcb7 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Jul 2020 12:21:38 -0500 Subject: net/sched: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/act_csum.c | 3 ++- net/sched/act_ct.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_ets.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_sfq.c | 2 +- 14 files changed, 15 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index cb8608f0a77a..9035355e867f 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -598,7 +598,8 @@ again: if (!tcf_csum_ipv6(skb, update_flags)) goto drop; break; - case cpu_to_be16(ETH_P_8021AD): /* fall through */ + case cpu_to_be16(ETH_P_8021AD): + fallthrough; case cpu_to_be16(ETH_P_8021Q): if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) { protocol = skb->protocol; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 1b9c6d4a1b6b..fadfd6b0033b 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -783,7 +783,7 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } } /* Non-ICMP, fall thru to initialize if needed. */ - /* fall through */ + fallthrough; case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 052d4a1af69a..9e16fdf47fe7 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -250,7 +250,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 0d5c9a8ec61d..ad14df2ecf3a 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -324,7 +324,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 373dc5855d4e..1af86f30b18e 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -397,7 +397,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6bf979f95509..bca016ffc069 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -99,7 +99,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return 0; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index a27a250ab8f9..741840b9994f 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -102,7 +102,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return 0; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 7f6670044f0a..13ba8648bb63 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1136,7 +1136,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 52fc513688b1..b07f29059f47 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -239,7 +239,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 648611f5c105..56bdc4bcdc63 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -43,7 +43,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index a3e187f2603c..46b7ce81c6e3 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -46,7 +46,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index ede854516825..3cfe6262eb00 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -699,7 +699,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return NULL; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index d2a6e78262bb..356f6d1d30db 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -265,7 +265,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return false; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 46cdefd69e44..a1314510dc69 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -186,7 +186,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - /* fall through */ + fallthrough; case TC_ACT_SHOT: return 0; } -- cgit From f5836749c9c04a10decd2742845ad4870965fdef Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 6 Jul 2020 16:01:25 -0700 Subject: bpf: Add BPF_CGROUP_INET_SOCK_RELEASE hook Sometimes it's handy to know when the socket gets freed. In particular, we'd like to try to use a smarter allocation of ports for bpf_bind and explore the possibility of limiting the number of SOCK_DGRAM sockets the process can have. Implement BPF_CGROUP_INET_SOCK_RELEASE hook that triggers on inet socket release. It triggers only for userspace sockets (not in-kernel ones) and therefore has the same semantics as the existing BPF_CGROUP_INET_SOCK_CREATE. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200706230128.4073544-2-sdf@google.com --- net/core/filter.c | 1 + net/ipv4/af_inet.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index c5e696e6c315..ddcc0d6209e1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6890,6 +6890,7 @@ static bool __sock_filter_check_attach_type(int off, case offsetof(struct bpf_sock, priority): switch (attach_type) { case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: goto full_access; default: return false; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ea6ed6d487ed..ff141d630bdf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -411,6 +411,9 @@ int inet_release(struct socket *sock) if (sk) { long timeout; + if (!sk->sk_kern_sock) + BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk); + /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); -- cgit From 51b64c476a5ddc66d3459f14e98e5de9211e9e24 Mon Sep 17 00:00:00 2001 From: Miao-chen Chou Date: Tue, 7 Jul 2020 15:52:28 -0700 Subject: Bluetooth: Use whitelist for scan policy when suspending Even with one advertisement monitor in place, the scan policy should use the whitelist while the system is going to suspend to prevent waking by random advertisement. The following test was performed. - With a paired device, register one advertisement monitor, suspend the system and verify that the host was not awaken by random advertisements. Signed-off-by: Miao-chen Chou Reviewed-by: Abhishek Pandit-Subedi Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 68a2ec36e1c1..770b93758112 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -800,9 +800,10 @@ static u8 update_white_list(struct hci_request *req) /* Once the controller offloading of advertisement monitor is in place, * the if condition should include the support of MSFT extension - * support. + * support. If suspend is ongoing, whitelist should be the default to + * prevent waking by random advertisements. */ - if (!idr_is_empty(&hdev->adv_monitors_idr)) + if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended) return 0x00; /* Select filter policy to use white list */ -- cgit From 17809516a03a045565ad6a80e6241754615871ac Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 8 Jul 2020 09:46:25 -0700 Subject: net: phy: Uninline PHY ethtool statistics operations Now that we have moved the PHY ethtool statistics to be dynamically registered, we no longer need to inline those for ethtool. This used to be done to avoid cross symbol referencing and allow ethtool to be decoupled from PHYLIB entirely. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ethtool/ioctl.c | 23 +++++++++++++++-------- net/ethtool/strset.c | 11 +++++++---- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 83f22196d64c..441794e0034f 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -135,6 +135,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) static int __ethtool_get_sset_count(struct net_device *dev, int sset) { + const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; if (sset == ETH_SS_FEATURES) @@ -150,8 +151,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) return ARRAY_SIZE(phy_tunable_strings); if (sset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - return phy_ethtool_get_sset_count(dev->phydev); + !ops->get_ethtool_phy_stats && + phy_ops && phy_ops->get_sset_count) + return phy_ops->get_sset_count(dev->phydev); if (sset == ETH_SS_LINK_MODES) return __ETHTOOL_LINK_MODE_MASK_NBITS; @@ -165,6 +167,7 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset) static void __ethtool_get_strings(struct net_device *dev, u32 stringset, u8 *data) { + const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; if (stringset == ETH_SS_FEATURES) @@ -178,8 +181,9 @@ static void __ethtool_get_strings(struct net_device *dev, else if (stringset == ETH_SS_PHY_TUNABLES) memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings)); else if (stringset == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - phy_ethtool_get_strings(dev->phydev, data); + !ops->get_ethtool_phy_stats && phy_ops && + phy_ops->get_strings) + phy_ops->get_strings(dev->phydev, data); else if (stringset == ETH_SS_LINK_MODES) memcpy(data, link_mode_names, __ETHTOOL_LINK_MODE_MASK_NBITS * ETH_GSTRING_LEN); @@ -1929,6 +1933,7 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) { + const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; struct ethtool_stats stats; @@ -1938,8 +1943,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count)) return -EOPNOTSUPP; - if (dev->phydev && !ops->get_ethtool_phy_stats) - n_stats = phy_ethtool_get_sset_count(dev->phydev); + if (dev->phydev && !ops->get_ethtool_phy_stats && + phy_ops && phy_ops->get_sset_count) + n_stats = phy_ops->get_sset_count(dev->phydev); else n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); if (n_stats < 0) @@ -1958,8 +1964,9 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - if (dev->phydev && !ops->get_ethtool_phy_stats) { - ret = phy_ethtool_get_stats(dev->phydev, &stats, data); + if (dev->phydev && !ops->get_ethtool_phy_stats && + phy_ops && phy_ops->get_stats) { + ret = phy_ops->get_stats(dev->phydev, &stats, data); if (ret < 0) goto out; } else { diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 0eed4e4909ab..773634b6b048 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -209,13 +209,15 @@ static void strset_cleanup_data(struct ethnl_reply_data *reply_base) static int strset_prepare_set(struct strset_info *info, struct net_device *dev, unsigned int id, bool counts_only) { + const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; void *strings; int count, ret; if (id == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - ret = phy_ethtool_get_sset_count(dev->phydev); + !ops->get_ethtool_phy_stats && phy_ops && + phy_ops->get_sset_count) + ret = phy_ops->get_sset_count(dev->phydev); else if (ops->get_sset_count && ops->get_strings) ret = ops->get_sset_count(dev, id); else @@ -231,8 +233,9 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev, if (!strings) return -ENOMEM; if (id == ETH_SS_PHY_STATS && dev->phydev && - !ops->get_ethtool_phy_stats) - phy_ethtool_get_strings(dev->phydev, strings); + !ops->get_ethtool_phy_stats && phy_ops && + phy_ops->get_strings) + phy_ops->get_strings(dev->phydev, strings); else ops->get_strings(dev, id, strings); info->strings = strings; -- cgit From 065e0d42a0a728d7f6c2aec7c9f3e5dc7b715394 Mon Sep 17 00:00:00 2001 From: Meir Lichtinger Date: Mon, 6 Jul 2020 20:42:32 -0700 Subject: ethtool: Add support for 100Gbps per lane link modes Define 100G, 200G and 400G link modes using 100Gbps per lane LR, ER and FR are defined as a single link mode because they are using same technology and by design are fully interoperable. EEPROM content indicates if the module is LR, ER, or FR, and the user space ethtool decoder is planned to support decoding these modes in the EEPROM. Signed-off-by: Meir Lichtinger CC: Andrew Lunn Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- net/ethtool/common.c | 15 +++++++++++++++ net/ethtool/linkmodes.c | 15 +++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'net') diff --git a/net/ethtool/common.c b/net/ethtool/common.c index ce4dbae5a943..c54166713797 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -176,6 +176,21 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(400000, DR8, Full), __DEFINE_LINK_MODE_NAME(400000, CR8, Full), __DEFINE_SPECIAL_MODE_NAME(FEC_LLRS, "LLRS"), + __DEFINE_LINK_MODE_NAME(100000, KR, Full), + __DEFINE_LINK_MODE_NAME(100000, SR, Full), + __DEFINE_LINK_MODE_NAME(100000, LR_ER_FR, Full), + __DEFINE_LINK_MODE_NAME(100000, DR, Full), + __DEFINE_LINK_MODE_NAME(100000, CR, Full), + __DEFINE_LINK_MODE_NAME(200000, KR2, Full), + __DEFINE_LINK_MODE_NAME(200000, SR2, Full), + __DEFINE_LINK_MODE_NAME(200000, LR2_ER2_FR2, Full), + __DEFINE_LINK_MODE_NAME(200000, DR2, Full), + __DEFINE_LINK_MODE_NAME(200000, CR2, Full), + __DEFINE_LINK_MODE_NAME(400000, KR4, Full), + __DEFINE_LINK_MODE_NAME(400000, SR4, Full), + __DEFINE_LINK_MODE_NAME(400000, LR4_ER4_FR4, Full), + __DEFINE_LINK_MODE_NAME(400000, DR4, Full), + __DEFINE_LINK_MODE_NAME(400000, CR4, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index fd4f3e58c6f6..317a93129551 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -257,6 +257,21 @@ static const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), + __DEFINE_LINK_MODE_PARAMS(100000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), }; static const struct nla_policy -- cgit From efd7fe68f0c6c9649757bf80cbc382fd21e764c9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jul 2020 14:25:36 +0200 Subject: net: dsa: tag_rtl4_a: Implement Realtek 4 byte A tag This implements the known parts of the Realtek 4 byte tag protocol version 0xA, as found in the RTL8366RB DSA switch. It is designated as protocol version 0xA as a different Realtek 4 byte tag format with protocol version 0x9 is known to exist in the Realtek RTL8306 chips. The tag and switch chip lacks public documentation, so the tag format has been reverse-engineered from packet dumps. As only ingress traffic has been available for analysis an egress tag has not been possible to develop (even using educated guesses about bit fields) so this is as far as it gets. It is not known if the switch even supports egress tagging. Excessive attempts to figure out the egress tag format was made. When nothing else worked, I just tried all bit combinations with 0xannp where a is protocol and p is port. I looped through all values several times trying to get a response from ping, without any positive result. Using just these ingress tags however, the switch functionality is vastly improved and the packets find their way into the destination port without any tricky VLAN configuration. On the D-Link DIR-685 the LAN ports now come up and respond to ping without any command line configuration so this is a real improvement for users. Egress packets need to be restricted to the proper target ports using VLAN, which the RTL8366RB DSA switch driver already sets up. Cc: DENG Qingfang Cc: Mauri Sandberg Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: David S. Miller --- net/dsa/Kconfig | 7 +++ net/dsa/Makefile | 1 + net/dsa/tag_rtl4_a.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 net/dsa/tag_rtl4_a.c (limited to 'net') diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index d5bc6ac599ef..1f9b9b11008c 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -86,6 +86,13 @@ config NET_DSA_TAG_KSZ Say Y if you want to enable support for tagging frames for the Microchip 8795/9477/9893 families of switches. +config NET_DSA_TAG_RTL4_A + tristate "Tag driver for Realtek 4 byte protocol A tags" + help + Say Y or M if you want to enable support for tagging frames for the + Realtek switches with 4 byte protocol A tags, sich as found in + the Realtek RTL8366RB. + config NET_DSA_TAG_OCELOT tristate "Tag driver for Ocelot family of switches" select PACKING diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 108486cfdeef..4f47b2025ff5 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o +obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c new file mode 100644 index 000000000000..7b63010fa87b --- /dev/null +++ b/net/dsa/tag_rtl4_a.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handler for Realtek 4 byte DSA switch tags + * Currently only supports protocol "A" found in RTL8366RB + * Copyright (c) 2020 Linus Walleij + * + * This "proprietary tag" header looks like so: + * + * ------------------------------------------------- + * | MAC DA | MAC SA | 0x8899 | 2 bytes tag | Type | + * ------------------------------------------------- + * + * The 2 bytes tag form a 16 bit big endian word. The exact + * meaning has been guessed from packet dumps from ingress + * frames, as no working egress traffic has been available + * we do not know the format of the egress tags or if they + * are even supported. + */ + +#include +#include + +#include "dsa_priv.h" + +#define RTL4_A_HDR_LEN 4 +#define RTL4_A_ETHERTYPE 0x8899 +#define RTL4_A_PROTOCOL_SHIFT 12 +/* + * 0x1 = Realtek Remote Control protocol (RRCP) + * 0x2/0x3 seems to be used for loopback testing + * 0x9 = RTL8306 DSA protocol + * 0xa = RTL8366RB DSA protocol + */ +#define RTL4_A_PROTOCOL_RTL8366RB 0xa + +static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + /* + * Just let it pass thru, we don't know if it is possible + * to tag a frame with the 0x8899 ethertype and direct it + * to a specific port, all attempts at reverse-engineering have + * ended up with the frames getting dropped. + * + * The VLAN set-up needs to restrict the frames to the right port. + * + * If you have documentation on the tagging format for RTL8366RB + * (tag type A) then please contribute. + */ + return skb; +} + +static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt) +{ + u16 protport; + __be16 *p; + u16 etype; + u8 *tag; + u8 prot; + u8 port; + + if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN))) + return NULL; + + /* The RTL4 header has its own custom Ethertype 0x8899 and that + * starts right at the beginning of the packet, after the src + * ethernet addr. Apparantly skb->data always points 2 bytes in, + * behind the Ethertype. + */ + tag = skb->data - 2; + p = (__be16 *)tag; + etype = ntohs(*p); + if (etype != RTL4_A_ETHERTYPE) { + /* Not custom, just pass through */ + netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype); + return skb; + } + p = (__be16 *)(tag + 2); + protport = ntohs(*p); + /* The 4 upper bits are the protocol */ + prot = (protport >> RTL4_A_PROTOCOL_SHIFT) & 0x0f; + if (prot != RTL4_A_PROTOCOL_RTL8366RB) { + netdev_err(dev, "unknown realtek protocol 0x%01x\n", prot); + return NULL; + } + port = protport & 0xff; + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) { + netdev_dbg(dev, "could not find slave for port %d\n", port); + return NULL; + } + + /* Remove RTL4 tag and recalculate checksum */ + skb_pull_rcsum(skb, RTL4_A_HDR_LEN); + + /* Move ethernet DA and SA in front of the data */ + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - RTL4_A_HDR_LEN, + 2 * ETH_ALEN); + + skb->offload_fwd_mark = 1; + + return skb; +} + +static int rtl4a_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = RTL4_A_HDR_LEN; + /* Skip past the tag and fetch the encapsulated Ethertype */ + *proto = ((__be16 *)skb->data)[1]; + + return 0; +} + +static const struct dsa_device_ops rtl4a_netdev_ops = { + .name = "rtl4a", + .proto = DSA_TAG_PROTO_RTL4_A, + .xmit = rtl4a_tag_xmit, + .rcv = rtl4a_tag_rcv, + .flow_dissect = rtl4a_tag_flow_dissect, + .overhead = RTL4_A_HDR_LEN, +}; +module_dsa_tag_driver(rtl4a_netdev_ops); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL4_A); -- cgit From 1475ee0ac9a16dd5df23ca8abe1039eb6086eb66 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:29 +0800 Subject: xfrm: add is_ipip to struct xfrm_input_afinfo This patch is to add a new member is_ipip to struct xfrm_input_afinfo, to allow another group family of callback functions to be registered with is_ipip set. This will be used for doing a callback for struct xfrm(6)_tunnel of ipip/ipv6 tunnels in xfrm_input() by calling xfrm_rcv_cb(), which is needed by ipip/ipv6 tunnels' support in ip(6)_vti and xfrm interface in the next patches. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index bd984ff17c2d..37456d022cfa 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -42,7 +42,7 @@ struct xfrm_trans_cb { #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); -static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; +static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[2][AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; @@ -53,14 +53,14 @@ int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) + if (WARN_ON(afinfo->family > AF_INET6)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_input_afinfo_lock); - if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) + if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) err = -EEXIST; else - rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); + rcu_assign_pointer(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], afinfo); spin_unlock_bh(&xfrm_input_afinfo_lock); return err; } @@ -71,11 +71,11 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) int err = 0; spin_lock_bh(&xfrm_input_afinfo_lock); - if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) + if (likely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family])) { + if (unlikely(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family] != afinfo)) err = -EINVAL; else - RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); + RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->is_ipip][afinfo->family], NULL); } spin_unlock_bh(&xfrm_input_afinfo_lock); synchronize_rcu(); @@ -83,15 +83,15 @@ int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_unregister_afinfo); -static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(u8 family, bool is_ipip) { const struct xfrm_input_afinfo *afinfo; - if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) + if (WARN_ON_ONCE(family > AF_INET6)) return NULL; rcu_read_lock(); - afinfo = rcu_dereference(xfrm_input_afinfo[family]); + afinfo = rcu_dereference(xfrm_input_afinfo[is_ipip][family]); if (unlikely(!afinfo)) rcu_read_unlock(); return afinfo; @@ -100,9 +100,11 @@ static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, int err) { + bool is_ipip = (protocol == IPPROTO_IPIP || protocol == IPPROTO_IPV6); + const struct xfrm_input_afinfo *afinfo; int ret; - const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + afinfo = xfrm_input_get_afinfo(family, is_ipip); if (!afinfo) return -EAFNOSUPPORT; -- cgit From 6df2db5d37ba3df8c80d90c15f1e20480be43f75 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:30 +0800 Subject: tunnel4: add cb_handler to struct xfrm_tunnel This patch is to register a callback function tunnel4_rcv_cb with is_ipip set in a xfrm_input_afinfo object for tunnel4 and tunnel64. It will be called by xfrm_rcv_cb() from xfrm_input() when family is AF_INET and proto is IPPROTO_IPIP or IPPROTO_IPV6. v1->v2: - Fix a sparse warning caused by the missing "__rcu", as Jakub noticed. - Handle the err returned by xfrm_input_register_afinfo() in tunnel4_init/fini(), as Sabrina noticed. v2->v3: - Add "#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL)" to fix the build error when xfrm is disabled, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/tunnel4.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'net') diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index c4b2ccbeba04..e44aaf41a138 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -110,6 +110,33 @@ drop: return 0; } +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +static int tunnel4_rcv_cb(struct sk_buff *skb, u8 proto, int err) +{ + struct xfrm_tunnel __rcu *head; + struct xfrm_tunnel *handler; + int ret; + + head = (proto == IPPROTO_IPIP) ? tunnel4_handlers : tunnel64_handlers; + + for_each_tunnel_rcu(head, handler) { + if (handler->cb_handler) { + ret = handler->cb_handler(skb, err); + if (ret <= 0) + return ret; + } + } + + return 0; +} + +static const struct xfrm_input_afinfo tunnel4_input_afinfo = { + .family = AF_INET, + .is_ipip = true, + .callback = tunnel4_rcv_cb, +}; +#endif + #if IS_ENABLED(CONFIG_IPV6) static int tunnel64_rcv(struct sk_buff *skb) { @@ -230,6 +257,18 @@ static int __init tunnel4_init(void) #endif goto err; } +#endif +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + if (xfrm_input_register_afinfo(&tunnel4_input_afinfo)) { + inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); +#if IS_ENABLED(CONFIG_IPV6) + inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6); +#endif +#if IS_ENABLED(CONFIG_MPLS) + inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS); +#endif + goto err; + } #endif return 0; @@ -240,6 +279,10 @@ err: static void __exit tunnel4_fini(void) { +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + if (xfrm_input_unregister_afinfo(&tunnel4_input_afinfo)) + pr_err("tunnel4 close: can't remove input afinfo\n"); +#endif #if IS_ENABLED(CONFIG_MPLS) if (inet_del_protocol(&tunnelmpls4_protocol, IPPROTO_MPLS)) pr_err("tunnelmpls4 close: can't remove protocol\n"); -- cgit From 86afc7031826147407e96412668d343e0f1bd6fd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:31 +0800 Subject: tunnel6: add tunnel6_input_afinfo for ipip and ipv6 tunnels This patch is to register a callback function tunnel6_rcv_cb with is_ipip set in a xfrm_input_afinfo object for tunnel6 and tunnel46. It will be called by xfrm_rcv_cb() from xfrm_input() when family is AF_INET6 and proto is IPPROTO_IPIP or IPPROTO_IPV6. v1->v2: - Fix a sparse warning caused by the missing "__rcu", as Jakub noticed. - Handle the err returned by xfrm_input_register_afinfo() in tunnel6_init/fini(), as Sabrina noticed. v2->v3: - Add "#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL)" to fix the build error when xfrm is disabled, reported by kbuild test robot Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/tunnel6.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'net') diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 06c02ebe6b9b..00e8d8b1c9a7 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -155,6 +155,33 @@ drop: return 0; } +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +static int tunnel6_rcv_cb(struct sk_buff *skb, u8 proto, int err) +{ + struct xfrm6_tunnel __rcu *head; + struct xfrm6_tunnel *handler; + int ret; + + head = (proto == IPPROTO_IPV6) ? tunnel6_handlers : tunnel46_handlers; + + for_each_tunnel_rcu(head, handler) { + if (handler->cb_handler) { + ret = handler->cb_handler(skb, err); + if (ret <= 0) + return ret; + } + } + + return 0; +} + +static const struct xfrm_input_afinfo tunnel6_input_afinfo = { + .family = AF_INET6, + .is_ipip = true, + .callback = tunnel6_rcv_cb, +}; +#endif + static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; @@ -245,11 +272,25 @@ static int __init tunnel6_init(void) inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); return -EAGAIN; } +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + if (xfrm_input_register_afinfo(&tunnel6_input_afinfo)) { + pr_err("%s: can't add input afinfo\n", __func__); + inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); + inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); + if (xfrm6_tunnel_mpls_supported()) + inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS); + return -EAGAIN; + } +#endif return 0; } static void __exit tunnel6_fini(void) { +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + if (xfrm_input_unregister_afinfo(&tunnel6_input_afinfo)) + pr_err("%s: can't remove input afinfo\n", __func__); +#endif if (inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP)) pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) -- cgit From 87e66b9682d7067eb7db08040dae36b608a4d971 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:32 +0800 Subject: ip_vti: support IPIP tunnel processing with .cb_handler With tunnel4_input_afinfo added, IPIP tunnel processing in ip_vti can be easily done with .cb_handler. So replace the processing by calling ip_tunnel_rcv() with it. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 51 +++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1d9c8cff5ac3..68177f065117 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -91,32 +91,6 @@ static int vti_rcv_proto(struct sk_buff *skb) return vti_rcv(skb, 0, false); } -static int vti_rcv_tunnel(struct sk_buff *skb) -{ - struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id); - const struct iphdr *iph = ip_hdr(skb); - struct ip_tunnel *tunnel; - - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); - if (tunnel) { - struct tnl_ptk_info tpi = { - .proto = htons(ETH_P_IP), - }; - - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - if (iptunnel_pull_header(skb, 0, tpi.proto, false)) - goto drop; - return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false); - } - - return -EINVAL; -drop: - kfree_skb(skb); - return 0; -} - static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -495,11 +469,22 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; -static struct xfrm_tunnel ipip_handler __read_mostly = { +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +static int vti_rcv_tunnel(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr, 0, false); +} + +static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .handler = vti_rcv_tunnel, + .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 0, }; +#endif static int __net_init vti_init_net(struct net *net) { @@ -669,10 +654,12 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) msg = "ipip tunnel"; - err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_failed; +#endif msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); @@ -682,8 +669,10 @@ static int __init vti_init(void) return err; rtnl_link_failed: - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); xfrm_tunnel_failed: +#endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); @@ -699,7 +688,9 @@ pernet_dev_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); - xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); +#endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); -- cgit From e6ce64570f2451684b4f9bcbaee6c40c4a7dff82 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:33 +0800 Subject: ip_vti: support IPIP6 tunnel processing For IPIP6 tunnel processing, the functions called will be the same as that for IPIP tunnel's. So reuse it and register it with family == AF_INET6. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 68177f065117..c0b97b8f6fbd 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -658,7 +658,12 @@ static int __init vti_init(void) msg = "ipip tunnel"; err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET); if (err < 0) - goto xfrm_tunnel_failed; + goto xfrm_tunnel_ipip_failed; +#if IS_ENABLED(CONFIG_IPV6) + err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET6); + if (err < 0) + goto xfrm_tunnel_ipip6_failed; +#endif #endif msg = "netlink interface"; @@ -670,8 +675,12 @@ static int __init vti_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_ENABLED(CONFIG_IPV6) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); +xfrm_tunnel_ipip6_failed: +#endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); -xfrm_tunnel_failed: +xfrm_tunnel_ipip_failed: #endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: @@ -689,6 +698,9 @@ static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_ENABLED(CONFIG_IPV6) + xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); +#endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); #endif xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); -- cgit From 08622869ed3f167db9b2250ab1bb055f55293401 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:34 +0800 Subject: ip6_vti: support IP6IP6 tunnel processing with .cb_handler Similar to IPIP tunnel's processing, this patch is to support IP6IP6 tunnel processing with .cb_handler. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET6_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1147f647b9a0..39efe41f7b48 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1218,6 +1218,26 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +static int vti6_rcv_tunnel(struct sk_buff *skb) +{ + const xfrm_address_t *saddr; + __be32 spi; + + saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr; + spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr); + + return vti6_input_proto(skb, IPPROTO_IPV6, spi, 0); +} + +static struct xfrm6_tunnel vti_ipv6_handler __read_mostly = { + .handler = vti6_rcv_tunnel, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 0, +}; +#endif + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1243,6 +1263,12 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + msg = "ipv6 tunnel"; + err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); + if (err < 0) + goto vti_tunnel_failed; +#endif msg = "netlink interface"; err = rtnl_link_register(&vti6_link_ops); @@ -1252,6 +1278,10 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); +vti_tunnel_failed: +#endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); @@ -1270,6 +1300,9 @@ pernet_dev_failed: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); +#endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); -- cgit From 2ab110cbb0c0cb05c64f37f42b78f5bc11699b0e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:35 +0800 Subject: ip6_vti: support IP6IP tunnel processing For IP6IP tunnel processing, the functions called will be the same as that for IP6IP6 tunnel's. So reuse it and register it with family == AF_INET. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 39efe41f7b48..dfa93bc857d2 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1267,7 +1267,10 @@ static int __init vti6_tunnel_init(void) msg = "ipv6 tunnel"; err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) - goto vti_tunnel_failed; + goto vti_tunnel_ipv6_failed; + err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET); + if (err < 0) + goto vti_tunnel_ip6ip_failed; #endif msg = "netlink interface"; @@ -1279,8 +1282,10 @@ static int __init vti6_tunnel_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); +vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); -vti_tunnel_failed: +vti_tunnel_ipv6_failed: #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: @@ -1301,6 +1306,7 @@ static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); -- cgit From d5a7a5057387d79b91a6e2fd78a76ccd53f91e6c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:36 +0800 Subject: ipcomp: assign if_id to child tunnel from parent tunnel The child tunnel if_id will be used for xfrm interface's lookup when processing the IP(6)IP(6) packets in the next patches. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ipcomp.c | 1 + net/ipv6/ipcomp6.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59bfa3825810..b42683212c65 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -72,6 +72,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.flags = x->props.flags; t->props.extra_flags = x->props.extra_flags; memcpy(&t->mark, &x->mark, sizeof(t->mark)); + t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 99668bfebd85..daef890460b7 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -91,6 +91,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); + t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; -- cgit From d7b360c2869f9ce2418510d14baf0f9696fcf1e9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:37 +0800 Subject: xfrm: interface: support IP6IP6 and IP6IP tunnels processing with .cb_handler Similar to ip6_vti, IP6IP6 and IP6IP tunnels processing can easily be done with .cb_handler for xfrm interface. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET6_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index c407ecbc5d46..b9ef496d3d7c 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -798,6 +798,26 @@ static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .priority = 10, }; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +static int xfrmi6_rcv_tunnel(struct sk_buff *skb) +{ + const xfrm_address_t *saddr; + __be32 spi; + + saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr; + spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr); + + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); +} + +static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = { + .handler = xfrmi6_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = -1, +}; +#endif + static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { .handler = xfrm4_rcv, .input_handler = xfrm_input, @@ -866,9 +886,23 @@ static int __init xfrmi6_init(void) err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); + if (err < 0) + goto xfrm_tunnel_ipv6_failed; + err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET); + if (err < 0) + goto xfrm_tunnel_ip6ip_failed; +#endif return 0; +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +xfrm_tunnel_ip6ip_failed: + xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); +xfrm_tunnel_ipv6_failed: + xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); +#endif xfrm_proto_comp_failed: xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -879,6 +913,10 @@ xfrm_proto_esp_failed: static void xfrmi6_fini(void) { +#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) + xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET); + xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); +#endif xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP); -- cgit From da9bbf0598c9e66b8a46ceabaa6172596795acf2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 6 Jul 2020 20:01:38 +0800 Subject: xfrm: interface: support IPIP and IPIP6 tunnels processing with .cb_handler Similar to ip_vti, IPIP and IPIP6 tunnels processing can easily be done with .cb_handler for xfrm interface. v1->v2: - no change. v2-v3: - enable it only when CONFIG_INET_XFRM_TUNNEL is defined, to fix the build error, reported by kbuild test robot. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index b9ef496d3d7c..a79eb49a4e0d 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -842,6 +842,20 @@ static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = { .priority = 10, }; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +static int xfrmi4_rcv_tunnel(struct sk_buff *skb) +{ + return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); +} + +static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = { + .handler = xfrmi4_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = -1, +}; +#endif + static int __init xfrmi4_init(void) { int err; @@ -855,9 +869,23 @@ static int __init xfrmi4_init(void) err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); + if (err < 0) + goto xfrm_tunnel_ipip_failed; + err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET6); + if (err < 0) + goto xfrm_tunnel_ipip6_failed; +#endif return 0; +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +xfrm_tunnel_ipip6_failed: + xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); +xfrm_tunnel_ipip_failed: + xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); +#endif xfrm_proto_comp_failed: xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@ -868,6 +896,10 @@ xfrm_proto_esp_failed: static void xfrmi4_fini(void) { +#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) + xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); +#endif xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP); -- cgit From 3f935c75eb52dd968351dba824adf466fb9c9429 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:39 +0200 Subject: inet_diag: support for wider protocol numbers After commit bf9765145b85 ("sock: Make sk_protocol a 16-bit value") the current size of 'sdiag_protocol' is not sufficient to represent the possible protocol values. This change introduces a new inet diag request attribute to let user space specify the relevant protocol number using u32 values. The attribute is parsed by inet diag core on get/dump command and the extended protocol value, if available, is preferred to 'sdiag_protocol' to lookup the diag handler. The parse attributed are exposed to all the diag handlers via the cb->data. Note that inet_diag_dump_one_icsk() is left unmodified, as it will not be used by protocol using the extended attribute. Suggested-by: David S. Miller Co-developed-by: Christoph Paasch Signed-off-by: Christoph Paasch Acked-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/sock.c | 1 + net/ipv4/inet_diag.c | 65 ++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index f5b5fdd61c88..de26fe4ea19f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3566,6 +3566,7 @@ int sock_load_diag_module(int family, int protocol) #ifdef CONFIG_INET if (family == AF_INET && protocol != IPPROTO_RAW && + protocol < MAX_INET_PROTOS && !rcu_access_pointer(inet_protos[protocol])) return -ENOENT; #endif diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 125f4f8a36b4..4a98dd736270 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -52,6 +52,11 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { + if (proto < 0 || proto >= IPPROTO_MAX) { + mutex_lock(&inet_diag_table_mutex); + return ERR_PTR(-ENOENT); + } + if (!inet_diag_table[proto]) sock_load_diag_module(AF_INET, proto); @@ -181,6 +186,28 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); +static void inet_diag_parse_attrs(const struct nlmsghdr *nlh, int hdrlen, + struct nlattr **req_nlas) +{ + struct nlattr *nla; + int remaining; + + nlmsg_for_each_attr(nla, nlh, hdrlen, remaining) { + int type = nla_type(nla); + + if (type < __INET_DIAG_REQ_MAX) + req_nlas[type] = nla; + } +} + +static int inet_diag_get_protocol(const struct inet_diag_req_v2 *req, + const struct inet_diag_dump_data *data) +{ + if (data->req_nlas[INET_DIAG_REQ_PROTOCOL]) + return nla_get_u32(data->req_nlas[INET_DIAG_REQ_PROTOCOL]); + return req->sdiag_protocol; +} + #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, @@ -198,7 +225,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, void *info = NULL; cb_data = cb->data; - handler = inet_diag_table[req->sdiag_protocol]; + handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)]; BUG_ON(!handler); nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -539,20 +566,25 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk); static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, const struct nlmsghdr *nlh, + int hdrlen, const struct inet_diag_req_v2 *req) { const struct inet_diag_handler *handler; - int err; + struct inet_diag_dump_data dump_data; + int err, protocol; - handler = inet_diag_lock_handler(req->sdiag_protocol); + memset(&dump_data, 0, sizeof(dump_data)); + inet_diag_parse_attrs(nlh, hdrlen, dump_data.req_nlas); + protocol = inet_diag_get_protocol(req, &dump_data); + + handler = inet_diag_lock_handler(protocol); if (IS_ERR(handler)) { err = PTR_ERR(handler); } else if (cmd == SOCK_DIAG_BY_FAMILY) { - struct inet_diag_dump_data empty_dump_data = {}; struct netlink_callback cb = { .nlh = nlh, .skb = in_skb, - .data = &empty_dump_data, + .data = &dump_data, }; err = handler->dump_one(&cb, req); } else if (cmd == SOCK_DESTROY && handler->destroy) { @@ -1103,13 +1135,16 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { + struct inet_diag_dump_data *cb_data = cb->data; const struct inet_diag_handler *handler; u32 prev_min_dump_alloc; - int err = 0; + int protocol, err = 0; + + protocol = inet_diag_get_protocol(r, cb_data); again: prev_min_dump_alloc = cb->min_dump_alloc; - handler = inet_diag_lock_handler(r->sdiag_protocol); + handler = inet_diag_lock_handler(protocol); if (!IS_ERR(handler)) handler->dump(skb, cb, r); else @@ -1139,19 +1174,13 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) struct inet_diag_dump_data *cb_data; struct sk_buff *skb = cb->skb; struct nlattr *nla; - int rem, err; + int err; cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); if (!cb_data) return -ENOMEM; - nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen), - nlmsg_attrlen(nlh, hdrlen), rem) { - int type = nla_type(nla); - - if (type < __INET_DIAG_REQ_MAX) - cb_data->req_nlas[type] = nla; - } + inet_diag_parse_attrs(nlh, hdrlen, cb_data->req_nlas); nla = cb_data->inet_diag_nla_bc; if (nla) { @@ -1237,7 +1266,8 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req); + return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, + sizeof(struct inet_diag_req), &req); } static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -1279,7 +1309,8 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) return netlink_dump_start(net->diag_nlsk, skb, h, &c); } - return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); + return inet_diag_cmd_exact(h->nlmsg_type, skb, h, hdrlen, + nlmsg_data(h)); } static -- cgit From 96d890daad05a3e47e914451f07b79275b325c95 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:40 +0200 Subject: mptcp: add msk interations helper mptcp_token_iter_next() allow traversing all the MPTCP sockets inside the token container belonging to the given network namespace with a quite standard iterator semantic. That will be used by the next patch, but keep the API generic, as we plan to use this later for PM's sake. Additionally export mptcp_token_get_sock(), as it also will be used by the diag module. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 2 ++ net/mptcp/token.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 39bfec3f1586..e5baaef5ec89 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -391,6 +391,8 @@ int mptcp_token_new_connect(struct sock *sk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); struct mptcp_sock *mptcp_token_get_sock(u32 token); +struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, + long *s_num); void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 66a4990bd897..7d8106026081 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -238,6 +238,66 @@ found: rcu_read_unlock(); return msk; } +EXPORT_SYMBOL_GPL(mptcp_token_get_sock); + +/** + * mptcp_token_iter_next - iterate over the token container from given pos + * @net: namespace to be iterated + * @s_slot: start slot number + * @s_num: start number inside the given lock + * + * This function returns the first mptcp connection structure found inside the + * token container starting from the specified position, or NULL. + * + * On successful iteration, the iterator is move to the next position and the + * the acquires a reference to the returned socket. + */ +struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, + long *s_num) +{ + struct mptcp_sock *ret = NULL; + struct hlist_nulls_node *pos; + int slot, num; + + for (slot = *s_slot; slot <= token_mask; *s_num = 0, slot++) { + struct token_bucket *bucket = &token_hash[slot]; + struct sock *sk; + + num = 0; + + if (hlist_nulls_empty(&bucket->msk_chain)) + continue; + + rcu_read_lock(); + sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { + ++num; + if (!net_eq(sock_net(sk), net)) + continue; + + if (num <= *s_num) + continue; + + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + continue; + + if (!net_eq(sock_net(sk), net)) { + sock_put(sk); + continue; + } + + ret = mptcp_sk(sk); + rcu_read_unlock(); + goto out; + } + rcu_read_unlock(); + } + +out: + *s_slot = slot; + *s_num = num; + return ret; +} +EXPORT_SYMBOL_GPL(mptcp_token_iter_next); /** * mptcp_token_destroy_request - remove mptcp connection/token @@ -312,7 +372,6 @@ void __init mptcp_token_init(void) EXPORT_SYMBOL_GPL(mptcp_token_new_request); EXPORT_SYMBOL_GPL(mptcp_token_new_connect); EXPORT_SYMBOL_GPL(mptcp_token_accept); -EXPORT_SYMBOL_GPL(mptcp_token_get_sock); EXPORT_SYMBOL_GPL(mptcp_token_destroy_request); EXPORT_SYMBOL_GPL(mptcp_token_destroy); #endif -- cgit From ac3b45f6095452a9731f8825be1513d326dbfa15 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 9 Jul 2020 15:12:41 +0200 Subject: mptcp: add MPTCP socket diag interface exposes basic inet socket attribute, plus some MPTCP socket fields comprising PM status and MPTCP-level sequence numbers. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/Kconfig | 4 ++ net/mptcp/Makefile | 2 + net/mptcp/mptcp_diag.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 net/mptcp/mptcp_diag.c (limited to 'net') diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index af84fce70bb0..698bc3525160 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -13,6 +13,10 @@ config MPTCP if MPTCP +config INET_MPTCP_DIAG + depends on INET_DIAG + def_tristate INET_DIAG + config MPTCP_IPV6 bool "MPTCP: IPv6 support for Multipath TCP" select IPV6 diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index c53f9b845523..2360cbd27d59 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -4,6 +4,8 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o +obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o + mptcp_crypto_test-objs := crypto_test.o mptcp_token_test-objs := token_test.o obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o mptcp_token_test.o diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c new file mode 100644 index 000000000000..5f390a97f556 --- /dev/null +++ b/net/mptcp/mptcp_diag.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* MPTCP socket monitoring support + * + * Copyright (c) 2020 Red Hat + * + * Author: Paolo Abeni + */ + +#include +#include +#include +#include +#include +#include "protocol.h" + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + struct nlattr *bc, bool net_admin) +{ + if (!inet_diag_bc_sk(bc, sk)) + return 0; + + return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI, + net_admin); +} + +static int mptcp_diag_dump_one(struct netlink_callback *cb, + const struct inet_diag_req_v2 *req) +{ + struct sk_buff *in_skb = cb->skb; + struct mptcp_sock *msk = NULL; + struct sk_buff *rep; + int err = -ENOENT; + struct net *net; + struct sock *sk; + + net = sock_net(in_skb->sk); + msk = mptcp_token_get_sock(req->id.idiag_cookie[0]); + if (!msk) + goto out_nosk; + + err = -ENOMEM; + sk = (struct sock *)msk; + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct mptcp_info)) + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, + GFP_KERNEL); + if (!rep) + goto out; + + err = inet_sk_diag_fill(sk, inet_csk(sk), rep, cb, req, 0, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(rep); + goto out; + } + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + sock_put(sk); + +out_nosk: + return err; +} + +static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r) +{ + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; + struct mptcp_sock *msk; + struct nlattr *bc; + + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; + + while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) != + NULL) { + struct inet_sock *inet = (struct inet_sock *)msk; + struct sock *sk = (struct sock *)msk; + int ret = 0; + + if (!(r->idiag_states & (1 << sk->sk_state))) + goto next; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next; + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + + ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin); +next: + sock_put(sk); + if (ret < 0) { + /* will retry on the same position */ + cb->args[1]--; + break; + } + cond_resched(); + } +} + +static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *_info) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_info *info = _info; + u32 flags = 0; + bool slow; + u8 val; + + r->idiag_rqueue = sk_rmem_alloc_get(sk); + r->idiag_wqueue = sk_wmem_alloc_get(sk); + if (!info) + return; + + slow = lock_sock_fast(sk); + info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); + info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); + info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); + info->mptcpi_subflows_max = READ_ONCE(msk->pm.subflows_max); + val = READ_ONCE(msk->pm.add_addr_signal_max); + info->mptcpi_add_addr_signal_max = val; + val = READ_ONCE(msk->pm.add_addr_accept_max); + info->mptcpi_add_addr_accepted_max = val; + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) + flags |= MPTCP_INFO_FLAG_FALLBACK; + if (READ_ONCE(msk->can_ack)) + flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; + info->mptcpi_flags = flags; + info->mptcpi_token = READ_ONCE(msk->token); + info->mptcpi_write_seq = READ_ONCE(msk->write_seq); + info->mptcpi_snd_una = atomic64_read(&msk->snd_una); + info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); + unlock_sock_fast(sk, slow); +} + +static const struct inet_diag_handler mptcp_diag_handler = { + .dump = mptcp_diag_dump, + .dump_one = mptcp_diag_dump_one, + .idiag_get_info = mptcp_diag_get_info, + .idiag_type = IPPROTO_MPTCP, + .idiag_info_size = sizeof(struct mptcp_info), +}; + +static int __init mptcp_diag_init(void) +{ + return inet_diag_register(&mptcp_diag_handler); +} + +static void __exit mptcp_diag_exit(void) +{ + inet_diag_unregister(&mptcp_diag_handler); +} + +module_init(mptcp_diag_init); +module_exit(mptcp_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */); -- cgit From 10a429bab4462581bbda3fd7f41d4ec0ddc5e682 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:14 +0300 Subject: devlink: Move set attribute of devlink_port_attrs to devlink_port The struct devlink_port_attrs holds the attributes of devlink_port. The 'set' field is not devlink_port's attribute as opposed to most of the others. Move 'set' to be devlink_port's field called 'attrs_set'. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 6ae36808c152..f28ae63cdb6b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -528,7 +528,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, { struct devlink_port_attrs *attrs = &devlink_port->attrs; - if (!attrs->set) + if (!devlink_port->attrs_set) return 0; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; @@ -7518,7 +7518,7 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, if (WARN_ON(devlink_port->registered)) return -EEXIST; - attrs->set = true; + devlink_port->attrs_set = true; attrs->flavour = flavour; if (switch_id) { attrs->switch_port = true; @@ -7626,7 +7626,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; - if (!attrs->set) + if (!devlink_port->attrs_set) return -EOPNOTSUPP; switch (attrs->flavour) { -- cgit From 46737a194945e540e3e2eb1fc870207928a9c2eb Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:15 +0300 Subject: devlink: Move switch_port attribute of devlink_port_attrs to devlink_port The struct devlink_port_attrs holds the attributes of devlink_port. Similarly to the previous patch, 'switch_port' attribute is another exception. Move 'switch_port' to be devlink_port's field. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index f28ae63cdb6b..452b2f8a054e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7521,13 +7521,13 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, devlink_port->attrs_set = true; attrs->flavour = flavour; if (switch_id) { - attrs->switch_port = true; + devlink_port->switch_port = true; if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) switch_id_len = MAX_PHYS_ITEM_ID_LEN; memcpy(attrs->switch_id.id, switch_id, switch_id_len); attrs->switch_id.id_len = switch_id_len; } else { - attrs->switch_port = false; + devlink_port->switch_port = false; } return 0; } @@ -9461,7 +9461,7 @@ int devlink_compat_switch_id_get(struct net_device *dev, * any devlink lock as only permanent values are accessed. */ devlink_port = netdev_to_devlink_port(dev); - if (!devlink_port || !devlink_port->attrs.switch_port) + if (!devlink_port || !devlink_port->switch_port) return -EOPNOTSUPP; memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid)); -- cgit From 71ad8d55f8e5ea101069b552422f392655e2ffb6 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:16 +0300 Subject: devlink: Replace devlink_port_attrs_set parameters with a struct Currently, devlink_port_attrs_set accepts a long list of parameters, that most of them are devlink port's attributes. Use the devlink_port_attrs struct to replace the relevant parameters. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 54 +++++++++++------------------------------------------- net/dsa/dsa2.c | 17 +++++++++++------ 2 files changed, 22 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 452b2f8a054e..266936c38357 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7510,9 +7510,7 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) EXPORT_SYMBOL_GPL(devlink_port_type_clear); static int __devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - const unsigned char *switch_id, - unsigned char switch_id_len) + enum devlink_port_flavour flavour) { struct devlink_port_attrs *attrs = &devlink_port->attrs; @@ -7520,12 +7518,10 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, return -EEXIST; devlink_port->attrs_set = true; attrs->flavour = flavour; - if (switch_id) { + if (attrs->switch_id.id_len) { devlink_port->switch_port = true; - if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) - switch_id_len = MAX_PHYS_ITEM_ID_LEN; - memcpy(attrs->switch_id.id, switch_id, switch_id_len); - attrs->switch_id.id_len = switch_id_len; + if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN)) + attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN; } else { devlink_port->switch_port = false; } @@ -7536,33 +7532,17 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, * devlink_port_attrs_set - Set port attributes * * @devlink_port: devlink port - * @flavour: flavour of the port - * @port_number: number of the port that is facing user, for example - * the front panel port number - * @split: indicates if this is split port - * @split_subport_number: if the port is split, this is the number - * of subport. - * @switch_id: if the port is part of switch, this is buffer with ID, - * otwerwise this is NULL - * @switch_id_len: length of the switch_id buffer + * @attrs: devlink port attrs */ void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number, - const unsigned char *switch_id, - unsigned char switch_id_len) + struct devlink_port_attrs *attrs) { - struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - ret = __devlink_port_attrs_set(devlink_port, flavour, - switch_id, switch_id_len); + devlink_port->attrs = *attrs; + ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) return; - attrs->split = split; - attrs->phys.port_number = port_number; - attrs->phys.split_subport_number = split_subport_number; } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); @@ -7571,20 +7551,14 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set); * * @devlink_port: devlink port * @pf: associated PF for the devlink port instance - * @switch_id: if the port is part of switch, this is buffer with ID, - * otherwise this is NULL - * @switch_id_len: length of the switch_id buffer */ -void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf) +void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_PF, - switch_id, switch_id_len); + DEVLINK_PORT_FLAVOUR_PCI_PF); if (ret) return; @@ -7598,21 +7572,15 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set); * @devlink_port: devlink port * @pf: associated PF for the devlink port instance * @vf: associated VF of a PF for the devlink port instance - * @switch_id: if the port is part of switch, this is buffer with ID, - * otherwise this is NULL - * @switch_id_len: length of the switch_id buffer */ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf, u16 vf) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_VF, - switch_id, switch_id_len); + DEVLINK_PORT_FLAVOUR_PCI_VF); if (ret) return; attrs->pci_vf.pf = pf; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 076908fdd29b..e055efff390b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -261,10 +261,15 @@ static int dsa_port_setup(struct dsa_port *dp) struct devlink_port *dlp = &dp->devlink_port; bool dsa_port_link_registered = false; bool devlink_port_registered = false; + struct devlink_port_attrs attrs = {}; struct devlink *dl = ds->devlink; bool dsa_port_enabled = false; int err = 0; + attrs.phys.port_number = dp->index; + memcpy(attrs.switch_id.id, id, len); + attrs.switch_id.id_len = len; + if (dp->setup) return 0; @@ -274,8 +279,8 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: memset(dlp, 0, sizeof(*dlp)); - devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU, - dp->index, false, 0, id, len); + attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU; + devlink_port_attrs_set(dlp, &attrs); err = devlink_port_register(dl, dlp, dp->index); if (err) break; @@ -294,8 +299,8 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_DSA: memset(dlp, 0, sizeof(*dlp)); - devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA, - dp->index, false, 0, id, len); + attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA; + devlink_port_attrs_set(dlp, &attrs); err = devlink_port_register(dl, dlp, dp->index); if (err) break; @@ -314,8 +319,8 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_USER: memset(dlp, 0, sizeof(*dlp)); - devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL, - dp->index, false, 0, id, len); + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + devlink_port_attrs_set(dlp, &attrs); err = devlink_port_register(dl, dlp, dp->index); if (err) break; -- cgit From a21cf0a8330bba60e44ca6c99e1591042f336ff5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:18 +0300 Subject: devlink: Add a new devlink port lanes attribute and pass to netlink Add a new devlink port attribute that indicates the port's number of lanes. Drivers are expected to set it via devlink_port_attrs_set(), before registering the port. The attribute is not passed to user space in case the number of lanes is invalid (0). Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 266936c38357..7f26d1054974 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -530,6 +530,10 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, if (!devlink_port->attrs_set) return 0; + if (attrs->lanes) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes)) + return -EMSGSIZE; + } if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; switch (devlink_port->attrs.flavour) { -- cgit From a0f49b54865273c895be3826d6d59cbc5ad725c2 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:20 +0300 Subject: devlink: Add a new devlink port split ability attribute and pass to netlink Add a new attribute that indicates the split ability of devlink port. Drivers are expected to set it via devlink_port_attrs_set(), before registering the port. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 7f26d1054974..94c797b74378 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -534,6 +534,8 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes)) return -EMSGSIZE; } + if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable)) + return -EMSGSIZE; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; switch (devlink_port->attrs.flavour) { @@ -7547,6 +7549,7 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) return; + WARN_ON(attrs->splittable && attrs->split); } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); -- cgit From 82901ad16905832b7a79ff4316302bb59ec4b4ba Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 9 Jul 2020 16:18:21 +0300 Subject: devlink: Move input checks from driver to devlink Currently, all the input checks are done in driver. After adding the split capability to devlink port, move the checks to devlink. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 94c797b74378..346d385ba09f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -940,6 +940,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct devlink_port *devlink_port; u32 port_index; u32 count; @@ -947,8 +948,27 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) return -EINVAL; + devlink_port = devlink_port_get_from_info(devlink, info); port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); + + if (IS_ERR(devlink_port)) + return -EINVAL; + + if (!devlink_port->attrs.splittable) { + /* Split ports cannot be split. */ + if (devlink_port->attrs.split) + NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further"); + else + NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split"); + return -EINVAL; + } + + if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count"); + return -EINVAL; + } + return devlink_port_split(devlink, port_index, count, info->extack); } -- cgit From 51c19bf3d5cfaa66571e4b88ba2a6f6295311101 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 10 Jul 2020 12:09:15 -0400 Subject: Bluetooth: Fix slab-out-of-bounds read in hci_extended_inquiry_result_evt() Check upon `num_rsp` is insufficient. A malformed event packet with a large `num_rsp` number makes hci_extended_inquiry_result_evt() go out of bounds. Fix it. This patch fixes the following syzbot bug: https://syzkaller.appspot.com/bug?id=4bf11aa05c4ca51ce0df86e500fce486552dc8d2 Reported-by: syzbot+d8489a79b781849b9c46@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Peilin Ye Acked-by: Greg Kroah-Hartman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 03a0759f2fc2..13d8802b8137 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4375,7 +4375,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - if (!num_rsp) + if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1) return; if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) -- cgit From 19186c7b45c134820ea6fde3165a2cf30c1ace47 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Jul 2020 15:18:23 -0500 Subject: Bluetooth: core: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/hci_sock.c | 3 +-- net/bluetooth/l2cap_core.c | 19 +++++++++---------- net/bluetooth/l2cap_sock.c | 4 ++-- net/bluetooth/mgmt.c | 4 ++-- net/bluetooth/smp.c | 2 +- 6 files changed, 17 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 13d8802b8137..927bde511170 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2825,7 +2825,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) case HCI_AUTO_CONN_LINK_LOSS: if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT) break; - /* Fall through */ + fallthrough; case HCI_AUTO_CONN_DIRECT: case HCI_AUTO_CONN_ALWAYS: @@ -4320,7 +4320,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, if (hci_setup_sync(conn, conn->link->handle)) goto unlock; } - /* fall through */ + fallthrough; default: conn->state = BT_CLOSED; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d5627967fc25..fad842750442 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -443,8 +443,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) case HCI_DEV_SETUP: if (hdev->manufacturer == 0xffff) return NULL; - - /* fall through */ + fallthrough; case HCI_DEV_UP: skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 35d2bc569a2d..ade83e224567 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -666,8 +666,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err) l2cap_seq_list_free(&chan->srej_list); l2cap_seq_list_free(&chan->retrans_list); - - /* fall through */ + fallthrough; case L2CAP_MODE_STREAMING: skb_queue_purge(&chan->tx_q); @@ -872,7 +871,8 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) else return HCI_AT_NO_BONDING; } - /* fall through */ + fallthrough; + default: switch (chan->sec_level) { case BT_SECURITY_HIGH: @@ -2983,8 +2983,7 @@ static void l2cap_tx_state_wait_f(struct l2cap_chan *chan, break; case L2CAP_EV_RECV_REQSEQ_AND_FBIT: l2cap_process_reqseq(chan, control->reqseq); - - /* Fall through */ + fallthrough; case L2CAP_EV_RECV_FBIT: if (control && control->final) { @@ -3311,7 +3310,7 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) case L2CAP_MODE_ERTM: if (l2cap_mode_supported(mode, remote_feat_mask)) return mode; - /* fall through */ + fallthrough; default: return L2CAP_MODE_BASIC; } @@ -3447,7 +3446,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); - /* fall through */ + fallthrough; default: chan->mode = l2cap_select_mode(rfc.mode, chan->conn->feat_mask); break; @@ -4539,7 +4538,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, goto done; break; } - /* fall through */ + fallthrough; default: l2cap_chan_set_err(chan, ECONNRESET); @@ -7719,7 +7718,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->mtu = hcon->hdev->le_mtu; break; } - /* fall through */ + fallthrough; default: conn->mtu = hcon->hdev->acl_mtu; break; @@ -7841,7 +7840,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, case L2CAP_MODE_STREAMING: if (!disable_ertm) break; - /* fall through */ + fallthrough; default: err = -EOPNOTSUPP; goto done; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a995d2c51fa7..738a5345fa21 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -284,7 +284,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) case L2CAP_MODE_STREAMING: if (!disable_ertm) break; - /* fall through */ + fallthrough; default: err = -EOPNOTSUPP; goto done; @@ -760,7 +760,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, case L2CAP_MODE_STREAMING: if (!disable_ertm) break; - /* fall through */ + fallthrough; default: err = -EINVAL; break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d29da80e38fe..686ef4792831 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4525,7 +4525,7 @@ static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, *mgmt_status = mgmt_le_support(hdev); if (*mgmt_status) return false; - /* Intentional fall-through */ + fallthrough; case DISCOV_TYPE_BREDR: *mgmt_status = mgmt_bredr_support(hdev); if (*mgmt_status) @@ -5901,7 +5901,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, case MGMT_LTK_P256_DEBUG: authenticated = 0x00; type = SMP_LTK_P256_DEBUG; - /* fall through */ + fallthrough; default: continue; } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 6fd9ddb2d85c..c5c812e8130e 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1658,7 +1658,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) memset(smp->tk, 0, sizeof(smp->tk)); BT_DBG("PassKey: %d", value); put_unaligned_le32(value, smp->tk); - /* Fall Through */ + fallthrough; case MGMT_OP_USER_CONFIRM_REPLY: set_bit(SMP_FLAG_TK_VALID, &smp->flags); break; -- cgit From 710a9194610ace9db8ea9ac44677ed69602b1ad9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Jul 2020 15:25:05 -0500 Subject: Bluetooth: RFCOMM: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 2e20af317cea..f2bacb464ccf 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -479,7 +479,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) /* if closing a dlc in a session that hasn't been started, * just close and unlink the dlc */ - /* fall through */ + fallthrough; default: rfcomm_dlc_clear_timer(d); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index df14eebe80da..0afc4bc5ab41 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -218,7 +218,7 @@ static void __rfcomm_sock_close(struct sock *sk) case BT_CONFIG: case BT_CONNECTED: rfcomm_dlc_close(d, 0); - /* fall through */ + fallthrough; default: sock_set_flag(sk, SOCK_ZAPPED); -- cgit From d5baf620e5ba31bfd0205bcda15f79a0fa2021ab Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Wed, 8 Jul 2020 15:36:38 +0200 Subject: Replace HTTP links with HTTPS ones: BLUETOOTH SUBSYSTEM Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Marcel Holtmann --- net/bluetooth/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 1d6d243cdde9..e2497d764e97 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -21,7 +21,7 @@ menuconfig BT It was designed as a replacement for cables and other short-range technologies like IrDA. Bluetooth operates in personal area range that typically extends up to 10 meters. More information about - Bluetooth can be found at . + Bluetooth can be found at . Linux Bluetooth subsystem consist of several layers: Bluetooth Core -- cgit From cc4e3835eff474aa274d6e1d18f69d9d296d3b76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:46 -0700 Subject: udp_tunnel: add central NIC RX port offload infrastructure Cater to devices which: (a) may want to sleep in the callbacks; (b) only have IPv4 support; (c) need all the programming to happen while the netdev is up. Drivers attach UDP tunnel offload info struct to their netdevs, where they declare how many UDP ports of various tunnel types they support. Core takes care of tracking which ports to offload. Use a fixed-size array since this matches what almost all drivers do, and avoids a complexity and uncertainty around memory allocations in an atomic context. Make sure that tunnel drivers don't try to replay the ports when new NIC netdev is registered. Automatic replays would mess up reference counting, and will be removed completely once all drivers are converted. v4: - use a #define NULL to avoid build issues with CONFIG_INET=n. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ipv4/Makefile | 3 +- net/ipv4/udp_tunnel.c | 224 ------------- net/ipv4/udp_tunnel_core.c | 224 +++++++++++++ net/ipv4/udp_tunnel_nic.c | 821 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/udp_tunnel_stub.c | 7 + 5 files changed, 1054 insertions(+), 225 deletions(-) delete mode 100644 net/ipv4/udp_tunnel.c create mode 100644 net/ipv4/udp_tunnel_core.c create mode 100644 net/ipv4/udp_tunnel_nic.c create mode 100644 net/ipv4/udp_tunnel_stub.c (limited to 'net') diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9e1a186a3671..5b77a46885b9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -14,7 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ - metrics.o netlink.o nexthop.o + metrics.o netlink.o nexthop.o udp_tunnel_stub.o obj-$(CONFIG_BPFILTER) += bpfilter/ @@ -29,6 +29,7 @@ gre-y := gre_demux.o obj-$(CONFIG_NET_FOU) += fou.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o +udp_tunnel-y := udp_tunnel_core.o udp_tunnel_nic.o obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c deleted file mode 100644 index 3eecba0874aa..000000000000 --- a/net/ipv4/udp_tunnel.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, - struct socket **sockp) -{ - int err; - struct socket *sock = NULL; - struct sockaddr_in udp_addr; - - err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock); - if (err < 0) - goto error; - - if (cfg->bind_ifindex) { - err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); - if (err < 0) - goto error; - } - - udp_addr.sin_family = AF_INET; - udp_addr.sin_addr = cfg->local_ip; - udp_addr.sin_port = cfg->local_udp_port; - err = kernel_bind(sock, (struct sockaddr *)&udp_addr, - sizeof(udp_addr)); - if (err < 0) - goto error; - - if (cfg->peer_udp_port) { - udp_addr.sin_family = AF_INET; - udp_addr.sin_addr = cfg->peer_ip; - udp_addr.sin_port = cfg->peer_udp_port; - err = kernel_connect(sock, (struct sockaddr *)&udp_addr, - sizeof(udp_addr), 0); - if (err < 0) - goto error; - } - - sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; - - *sockp = sock; - return 0; - -error: - if (sock) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sock_release(sock); - } - *sockp = NULL; - return err; -} -EXPORT_SYMBOL(udp_sock_create4); - -void setup_udp_tunnel_sock(struct net *net, struct socket *sock, - struct udp_tunnel_sock_cfg *cfg) -{ - struct sock *sk = sock->sk; - - /* Disable multicast loopback */ - inet_sk(sk)->mc_loop = 0; - - /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ - inet_inc_convert_csum(sk); - - rcu_assign_sk_user_data(sk, cfg->sk_user_data); - - udp_sk(sk)->encap_type = cfg->encap_type; - udp_sk(sk)->encap_rcv = cfg->encap_rcv; - udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; - udp_sk(sk)->encap_destroy = cfg->encap_destroy; - udp_sk(sk)->gro_receive = cfg->gro_receive; - udp_sk(sk)->gro_complete = cfg->gro_complete; - - udp_tunnel_encap_enable(sock); -} -EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); - -void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, - unsigned short type) -{ - struct sock *sk = sock->sk; - struct udp_tunnel_info ti; - - if (!dev->netdev_ops->ndo_udp_tunnel_add || - !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - - ti.type = type; - ti.sa_family = sk->sk_family; - ti.port = inet_sk(sk)->inet_sport; - - dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); -} -EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); - -void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, - unsigned short type) -{ - struct sock *sk = sock->sk; - struct udp_tunnel_info ti; - - if (!dev->netdev_ops->ndo_udp_tunnel_del || - !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - return; - - ti.type = type; - ti.sa_family = sk->sk_family; - ti.port = inet_sk(sk)->inet_sport; - - dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); -} -EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); - -/* Notify netdevs that UDP port started listening */ -void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) -{ - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct udp_tunnel_info ti; - struct net_device *dev; - - ti.type = type; - ti.sa_family = sk->sk_family; - ti.port = inet_sk(sk)->inet_sport; - - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (!dev->netdev_ops->ndo_udp_tunnel_add) - continue; - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - continue; - dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); - } - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); - -/* Notify netdevs that UDP port is no more listening */ -void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) -{ - struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct udp_tunnel_info ti; - struct net_device *dev; - - ti.type = type; - ti.sa_family = sk->sk_family; - ti.port = inet_sk(sk)->inet_sport; - - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (!dev->netdev_ops->ndo_udp_tunnel_del) - continue; - if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) - continue; - dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); - } - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port); - -void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, - __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck) -{ - struct udphdr *uh; - - __skb_push(skb, sizeof(*uh)); - skb_reset_transport_header(skb); - uh = udp_hdr(skb); - - uh->dest = dst_port; - uh->source = src_port; - uh->len = htons(skb->len); - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - - udp_set_csum(nocheck, skb, src, dst, skb->len); - - iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); -} -EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); - -void udp_tunnel_sock_release(struct socket *sock) -{ - rcu_assign_sk_user_data(sock->sk, NULL); - kernel_sock_shutdown(sock, SHUT_RDWR); - sock_release(sock); -} -EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); - -struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, - __be16 flags, __be64 tunnel_id, int md_size) -{ - struct metadata_dst *tun_dst; - struct ip_tunnel_info *info; - - if (family == AF_INET) - tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size); - else - tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size); - if (!tun_dst) - return NULL; - - info = &tun_dst->u.tun_info; - info->key.tp_src = udp_hdr(skb)->source; - info->key.tp_dst = udp_hdr(skb)->dest; - if (udp_hdr(skb)->check) - info->key.tun_flags |= TUNNEL_CSUM; - return tun_dst; -} -EXPORT_SYMBOL_GPL(udp_tun_rx_dst); - -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c new file mode 100644 index 000000000000..3eecba0874aa --- /dev/null +++ b/net/ipv4/udp_tunnel_core.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, + struct socket **sockp) +{ + int err; + struct socket *sock = NULL; + struct sockaddr_in udp_addr; + + err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock); + if (err < 0) + goto error; + + if (cfg->bind_ifindex) { + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); + if (err < 0) + goto error; + } + + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->local_ip; + udp_addr.sin_port = cfg->local_udp_port; + err = kernel_bind(sock, (struct sockaddr *)&udp_addr, + sizeof(udp_addr)); + if (err < 0) + goto error; + + if (cfg->peer_udp_port) { + udp_addr.sin_family = AF_INET; + udp_addr.sin_addr = cfg->peer_ip; + udp_addr.sin_port = cfg->peer_udp_port; + err = kernel_connect(sock, (struct sockaddr *)&udp_addr, + sizeof(udp_addr), 0); + if (err < 0) + goto error; + } + + sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; + + *sockp = sock; + return 0; + +error: + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sock_release(sock); + } + *sockp = NULL; + return err; +} +EXPORT_SYMBOL(udp_sock_create4); + +void setup_udp_tunnel_sock(struct net *net, struct socket *sock, + struct udp_tunnel_sock_cfg *cfg) +{ + struct sock *sk = sock->sk; + + /* Disable multicast loopback */ + inet_sk(sk)->mc_loop = 0; + + /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ + inet_inc_convert_csum(sk); + + rcu_assign_sk_user_data(sk, cfg->sk_user_data); + + udp_sk(sk)->encap_type = cfg->encap_type; + udp_sk(sk)->encap_rcv = cfg->encap_rcv; + udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; + udp_sk(sk)->encap_destroy = cfg->encap_destroy; + udp_sk(sk)->gro_receive = cfg->gro_receive; + udp_sk(sk)->gro_complete = cfg->gro_complete; + + udp_tunnel_encap_enable(sock); +} +EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); + +void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type) +{ + struct sock *sk = sock->sk; + struct udp_tunnel_info ti; + + if (!dev->netdev_ops->ndo_udp_tunnel_add || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); +} +EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); + +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type) +{ + struct sock *sk = sock->sk; + struct udp_tunnel_info ti; + + if (!dev->netdev_ops->ndo_udp_tunnel_del || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); +} +EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); + +/* Notify netdevs that UDP port started listening */ +void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct udp_tunnel_info ti; + struct net_device *dev; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (!dev->netdev_ops->ndo_udp_tunnel_add) + continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; + dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); + +/* Notify netdevs that UDP port is no more listening */ +void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct udp_tunnel_info ti; + struct net_device *dev; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (!dev->netdev_ops->ndo_udp_tunnel_del) + continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; + dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port); + +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) +{ + struct udphdr *uh; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + uh->len = htons(skb->len); + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + udp_set_csum(nocheck, skb, src, dst, skb->len); + + iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); +} +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); + +void udp_tunnel_sock_release(struct socket *sock) +{ + rcu_assign_sk_user_data(sock->sk, NULL); + kernel_sock_shutdown(sock, SHUT_RDWR); + sock_release(sock); +} +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); + +struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, + __be16 flags, __be64 tunnel_id, int md_size) +{ + struct metadata_dst *tun_dst; + struct ip_tunnel_info *info; + + if (family == AF_INET) + tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size); + else + tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size); + if (!tun_dst) + return NULL; + + info = &tun_dst->u.tun_info; + info->key.tp_src = udp_hdr(skb)->source; + info->key.tp_dst = udp_hdr(skb)->dest; + if (udp_hdr(skb)->check) + info->key.tun_flags |= TUNNEL_CSUM; + return tun_dst; +} +EXPORT_SYMBOL_GPL(udp_tun_rx_dst); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c new file mode 100644 index 000000000000..056cfe0b770e --- /dev/null +++ b/net/ipv4/udp_tunnel_nic.c @@ -0,0 +1,821 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020 Facebook Inc. + +#include +#include +#include +#include +#include + +enum udp_tunnel_nic_table_entry_flags { + UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0), + UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1), + UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2), + UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3), +}; + +struct udp_tunnel_nic_table_entry { + __be16 port; + u8 type; + u8 use_cnt; + u8 flags; + u8 hw_priv; +}; + +/** + * struct udp_tunnel_nic - UDP tunnel port offload state + * @work: async work for talking to hardware from process context + * @dev: netdev pointer + * @need_sync: at least one port start changed + * @need_replay: space was freed, we need a replay of all ports + * @work_pending: @work is currently scheduled + * @n_tables: number of tables under @entries + * @missed: bitmap of tables which overflown + * @entries: table of tables of ports currently offloaded + */ +struct udp_tunnel_nic { + struct work_struct work; + + struct net_device *dev; + + u8 need_sync:1; + u8 need_replay:1; + u8 work_pending:1; + + unsigned int n_tables; + unsigned long missed; + struct udp_tunnel_nic_table_entry **entries; +}; + +/* We ensure all work structs are done using driver state, but not the code. + * We need a workqueue we can flush before module gets removed. + */ +static struct workqueue_struct *udp_tunnel_nic_workqueue; + +static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type) +{ + switch (type) { + case UDP_TUNNEL_TYPE_VXLAN: + return "vxlan"; + case UDP_TUNNEL_TYPE_GENEVE: + return "geneve"; + case UDP_TUNNEL_TYPE_VXLAN_GPE: + return "vxlan-gpe"; + default: + return "unknown"; + } +} + +static bool +udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->use_cnt == 0 && !entry->flags; +} + +static bool +udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN; +} + +static void +udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry) +{ + if (!udp_tunnel_nic_entry_is_free(entry)) + entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN; +} + +static void +udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry) +{ + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN; +} + +static bool +udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD | + UDP_TUNNEL_NIC_ENTRY_DEL); +} + +static void +udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn, + struct udp_tunnel_nic_table_entry *entry, + unsigned int flag) +{ + entry->flags |= flag; + utn->need_sync = 1; +} + +static void +udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry, + struct udp_tunnel_info *ti) +{ + memset(ti, 0, sizeof(*ti)); + ti->port = entry->port; + ti->type = entry->type; + ti->hw_priv = entry->hw_priv; +} + +static bool +udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) + return false; + return true; +} + +static bool +udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_table_info *table; + unsigned int i, j; + + if (!utn->missed) + return false; + + for (i = 0; i < utn->n_tables; i++) { + table = &dev->udp_tunnel_nic_info->tables[i]; + if (!test_bit(i, &utn->missed)) + continue; + + for (j = 0; j < table->n_entries; j++) + if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) + return true; + } + + return false; +} + +static void +__udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, + unsigned int idx, struct udp_tunnel_info *ti) +{ + struct udp_tunnel_nic_table_entry *entry; + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + entry = &utn->entries[table][idx]; + + if (entry->use_cnt) + udp_tunnel_nic_ti_from_entry(entry, ti); +} + +static void +__udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, + unsigned int idx, u8 priv) +{ + dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv; +} + +static void +udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry, + int err) +{ + bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; + + WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && + entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL); + + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && + (!err || (err == -EEXIST && dodgy))) + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD; + + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL && + (!err || (err == -ENOENT && dodgy))) + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL; + + if (!err) + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL; + else + entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL; +} + +static void +udp_tunnel_nic_device_sync_one(struct net_device *dev, + struct udp_tunnel_nic *utn, + unsigned int table, unsigned int idx) +{ + struct udp_tunnel_nic_table_entry *entry; + struct udp_tunnel_info ti; + int err; + + entry = &utn->entries[table][idx]; + if (!udp_tunnel_nic_entry_is_queued(entry)) + return; + + udp_tunnel_nic_ti_from_entry(entry, &ti); + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD) + err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti); + else + err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx, + &ti); + udp_tunnel_nic_entry_update_done(entry, err); + + if (err) + netdev_warn(dev, + "UDP tunnel port sync failed port %d type %s: %d\n", + be16_to_cpu(entry->port), + udp_tunnel_nic_tunnel_type_name(entry->type), + err); +} + +static void +udp_tunnel_nic_device_sync_by_port(struct net_device *dev, + struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + udp_tunnel_nic_device_sync_one(dev, utn, i, j); +} + +static void +udp_tunnel_nic_device_sync_by_table(struct net_device *dev, + struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + int err; + + for (i = 0; i < utn->n_tables; i++) { + /* Find something that needs sync in this table */ + for (j = 0; j < info->tables[i].n_entries; j++) + if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j])) + break; + if (j == info->tables[i].n_entries) + continue; + + err = info->sync_table(dev, i); + if (err) + netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n", + i, err); + + for (j = 0; j < info->tables[i].n_entries; j++) { + struct udp_tunnel_nic_table_entry *entry; + + entry = &utn->entries[i][j]; + if (udp_tunnel_nic_entry_is_queued(entry)) + udp_tunnel_nic_entry_update_done(entry, err); + } + } +} + +static void +__udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + if (!utn->need_sync) + return; + + if (dev->udp_tunnel_nic_info->sync_table) + udp_tunnel_nic_device_sync_by_table(dev, utn); + else + udp_tunnel_nic_device_sync_by_port(dev, utn); + + utn->need_sync = 0; + /* Can't replay directly here, in case we come from the tunnel driver's + * notification - trying to replay may deadlock inside tunnel driver. + */ + utn->need_replay = udp_tunnel_nic_should_replay(dev, utn); +} + +static void +udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + bool may_sleep; + + if (!utn->need_sync) + return; + + /* Drivers which sleep in the callback need to update from + * the workqueue, if we come from the tunnel driver's notification. + */ + may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; + if (!may_sleep) + __udp_tunnel_nic_device_sync(dev, utn); + if (may_sleep || utn->need_replay) { + queue_work(udp_tunnel_nic_workqueue, &utn->work); + utn->work_pending = 1; + } +} + +static bool +udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table, + struct udp_tunnel_info *ti) +{ + return table->tunnel_types & ti->type; +} + +static bool +udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i; + + /* Special case IPv4-only NICs */ + if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY && + ti->sa_family != AF_INET) + return false; + + for (i = 0; i < utn->n_tables; i++) + if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti)) + return true; + return false; +} + +static int +udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic_table_entry *entry; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) { + entry = &utn->entries[i][j]; + + if (!udp_tunnel_nic_entry_is_free(entry) && + entry->port == ti->port && + entry->type != ti->type) { + __set_bit(i, &utn->missed); + return true; + } + } + return false; +} + +static void +udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, + unsigned int table, unsigned int idx, int use_cnt_adj) +{ + struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; + bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; + unsigned int from, to; + + /* If not going from used to unused or vice versa - all done. + * For dodgy entries make sure we try to sync again (queue the entry). + */ + entry->use_cnt += use_cnt_adj; + if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj)) + return; + + /* Cancel the op before it was sent to the device, if possible, + * otherwise we'd need to take special care to issue commands + * in the same order the ports arrived. + */ + if (use_cnt_adj < 0) { + from = UDP_TUNNEL_NIC_ENTRY_ADD; + to = UDP_TUNNEL_NIC_ENTRY_DEL; + } else { + from = UDP_TUNNEL_NIC_ENTRY_DEL; + to = UDP_TUNNEL_NIC_ENTRY_ADD; + } + + if (entry->flags & from) { + entry->flags &= ~from; + if (!dodgy) + return; + } + + udp_tunnel_nic_entry_queue(utn, entry, to); +} + +static bool +udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn, + unsigned int table, unsigned int idx, + struct udp_tunnel_info *ti, int use_cnt_adj) +{ + struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; + + if (udp_tunnel_nic_entry_is_free(entry) || + entry->port != ti->port || + entry->type != ti->type) + return false; + + if (udp_tunnel_nic_entry_is_frozen(entry)) + return true; + + udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj); + return true; +} + +/* Try to find existing matching entry and adjust its use count, instead of + * adding a new one. Returns true if entry was found. In case of delete the + * entry may have gotten removed in the process, in which case it will be + * queued for removal. + */ +static bool +udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti, int use_cnt_adj) +{ + const struct udp_tunnel_nic_table_info *table; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) { + table = &dev->udp_tunnel_nic_info->tables[i]; + if (!udp_tunnel_nic_table_is_capable(table, ti)) + continue; + + for (j = 0; j < table->n_entries; j++) + if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti, + use_cnt_adj)) + return true; + } + + return false; +} + +static bool +udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + return udp_tunnel_nic_try_existing(dev, utn, ti, +1); +} + +static bool +udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + return udp_tunnel_nic_try_existing(dev, utn, ti, -1); +} + +static bool +udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn, + struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_table_info *table; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) { + table = &dev->udp_tunnel_nic_info->tables[i]; + if (!udp_tunnel_nic_table_is_capable(table, ti)) + continue; + + for (j = 0; j < table->n_entries; j++) { + struct udp_tunnel_nic_table_entry *entry; + + entry = &utn->entries[i][j]; + if (!udp_tunnel_nic_entry_is_free(entry)) + continue; + + entry->port = ti->port; + entry->type = ti->type; + entry->use_cnt = 1; + udp_tunnel_nic_entry_queue(utn, entry, + UDP_TUNNEL_NIC_ENTRY_ADD); + return true; + } + + /* The different table may still fit this port in, but there + * are no devices currently which have multiple tables accepting + * the same tunnel type, and false positives are okay. + */ + __set_bit(i, &utn->missed); + } + + return false; +} + +static void +__udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (!utn) + return; + if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY) + return; + + if (!udp_tunnel_nic_is_capable(dev, utn, ti)) + return; + + /* It may happen that a tunnel of one type is removed and different + * tunnel type tries to reuse its port before the device was informed. + * Rely on utn->missed to re-add this port later. + */ + if (udp_tunnel_nic_has_collision(dev, utn, ti)) + return; + + if (!udp_tunnel_nic_add_existing(dev, utn, ti)) + udp_tunnel_nic_add_new(dev, utn, ti); + + udp_tunnel_nic_device_sync(dev, utn); +} + +static void +__udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) +{ + struct udp_tunnel_nic *utn; + + utn = dev->udp_tunnel_nic; + if (!utn) + return; + + if (!udp_tunnel_nic_is_capable(dev, utn, ti)) + return; + + udp_tunnel_nic_del_existing(dev, utn, ti); + + udp_tunnel_nic_device_sync(dev, utn); +} + +static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + unsigned int i, j; + + ASSERT_RTNL(); + + utn = dev->udp_tunnel_nic; + if (!utn) + return; + + utn->need_sync = false; + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) { + struct udp_tunnel_nic_table_entry *entry; + + entry = &utn->entries[i][j]; + + entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | + UDP_TUNNEL_NIC_ENTRY_OP_FAIL); + /* We don't release rtnl across ops */ + WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); + if (!entry->use_cnt) + continue; + + udp_tunnel_nic_entry_queue(utn, entry, + UDP_TUNNEL_NIC_ENTRY_ADD); + } + + __udp_tunnel_nic_device_sync(dev, utn); +} + +static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { + .get_port = __udp_tunnel_nic_get_port, + .set_port_priv = __udp_tunnel_nic_set_port_priv, + .add_port = __udp_tunnel_nic_add_port, + .del_port = __udp_tunnel_nic_del_port, + .reset_ntf = __udp_tunnel_nic_reset_ntf, +}; + +static void +udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) { + int adj_cnt = -utn->entries[i][j].use_cnt; + + if (adj_cnt) + udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt); + } + + __udp_tunnel_nic_device_sync(dev, utn); + + for (i = 0; i < utn->n_tables; i++) + memset(utn->entries[i], 0, array_size(info->tables[i].n_entries, + sizeof(**utn->entries))); + WARN_ON(utn->need_sync); + utn->need_replay = 0; +} + +static void +udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + unsigned int i, j; + + /* Freeze all the ports we are already tracking so that the replay + * does not double up the refcount. + */ + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]); + utn->missed = 0; + utn->need_replay = 0; + + udp_tunnel_get_rx_info(dev); + + for (i = 0; i < utn->n_tables; i++) + for (j = 0; j < info->tables[i].n_entries; j++) + udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]); +} + +static void udp_tunnel_nic_device_sync_work(struct work_struct *work) +{ + struct udp_tunnel_nic *utn = + container_of(work, struct udp_tunnel_nic, work); + + rtnl_lock(); + utn->work_pending = 0; + __udp_tunnel_nic_device_sync(utn->dev, utn); + + if (utn->need_replay) + udp_tunnel_nic_replay(utn->dev, utn); + rtnl_unlock(); +} + +static struct udp_tunnel_nic * +udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, + unsigned int n_tables) +{ + struct udp_tunnel_nic *utn; + unsigned int i; + + utn = kzalloc(sizeof(*utn), GFP_KERNEL); + if (!utn) + return NULL; + utn->n_tables = n_tables; + INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); + + utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL); + if (!utn->entries) + goto err_free_utn; + + for (i = 0; i < n_tables; i++) { + utn->entries[i] = kcalloc(info->tables[i].n_entries, + sizeof(*utn->entries[i]), GFP_KERNEL); + if (!utn->entries[i]) + goto err_free_prev_entries; + } + + return utn; + +err_free_prev_entries: + while (i--) + kfree(utn->entries[i]); + kfree(utn->entries); +err_free_utn: + kfree(utn); + return NULL; +} + +static int udp_tunnel_nic_register(struct net_device *dev) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + unsigned int n_tables, i; + + BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < + UDP_TUNNEL_NIC_MAX_TABLES); + + if (WARN_ON(!info->set_port != !info->unset_port) || + WARN_ON(!info->set_port == !info->sync_table) || + WARN_ON(!info->tables[0].n_entries)) + return -EINVAL; + + n_tables = 1; + for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { + if (!info->tables[i].n_entries) + continue; + + n_tables++; + if (WARN_ON(!info->tables[i - 1].n_entries)) + return -EINVAL; + } + + utn = udp_tunnel_nic_alloc(info, n_tables); + if (!utn) + return -ENOMEM; + + utn->dev = dev; + dev_hold(dev); + dev->udp_tunnel_nic = utn; + + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + udp_tunnel_get_rx_info(dev); + + return 0; +} + +static void +udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) +{ + unsigned int i; + + /* Flush before we check work, so we don't waste time adding entries + * from the work which we will boot immediately. + */ + udp_tunnel_nic_flush(dev, utn); + + /* Wait for the work to be done using the state, netdev core will + * retry unregister until we give up our reference on this device. + */ + if (utn->work_pending) + return; + + for (i = 0; i < utn->n_tables; i++) + kfree(utn->entries[i]); + kfree(utn->entries); + kfree(utn); + dev->udp_tunnel_nic = NULL; + dev_put(dev); +} + +static int +udp_tunnel_nic_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + const struct udp_tunnel_nic_info *info; + struct udp_tunnel_nic *utn; + + info = dev->udp_tunnel_nic_info; + if (!info) + return NOTIFY_DONE; + + if (event == NETDEV_REGISTER) { + int err; + + err = udp_tunnel_nic_register(dev); + if (err) + netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err); + return notifier_from_errno(err); + } + /* All other events will need the udp_tunnel_nic state */ + utn = dev->udp_tunnel_nic; + if (!utn) + return NOTIFY_DONE; + + if (event == NETDEV_UNREGISTER) { + udp_tunnel_nic_unregister(dev, utn); + return NOTIFY_OK; + } + + /* All other events only matter if NIC has to be programmed open */ + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) + return NOTIFY_DONE; + + if (event == NETDEV_UP) { + WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); + udp_tunnel_get_rx_info(dev); + return NOTIFY_OK; + } + if (event == NETDEV_GOING_DOWN) { + udp_tunnel_nic_flush(dev, utn); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = { + .notifier_call = udp_tunnel_nic_netdevice_event, +}; + +static int __init udp_tunnel_nic_init_module(void) +{ + int err; + + udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0); + if (!udp_tunnel_nic_workqueue) + return -ENOMEM; + + rtnl_lock(); + udp_tunnel_nic_ops = &__udp_tunnel_nic_ops; + rtnl_unlock(); + + err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block); + if (err) + goto err_unset_ops; + + return 0; + +err_unset_ops: + rtnl_lock(); + udp_tunnel_nic_ops = NULL; + rtnl_unlock(); + destroy_workqueue(udp_tunnel_nic_workqueue); + return err; +} +late_initcall(udp_tunnel_nic_init_module); + +static void __exit udp_tunnel_nic_cleanup_module(void) +{ + unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block); + + rtnl_lock(); + udp_tunnel_nic_ops = NULL; + rtnl_unlock(); + + destroy_workqueue(udp_tunnel_nic_workqueue); +} +module_exit(udp_tunnel_nic_cleanup_module); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp_tunnel_stub.c b/net/ipv4/udp_tunnel_stub.c new file mode 100644 index 000000000000..c4b2888f5fef --- /dev/null +++ b/net/ipv4/udp_tunnel_stub.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (c) 2020 Facebook Inc. + +#include + +const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; +EXPORT_SYMBOL_GPL(udp_tunnel_nic_ops); -- cgit From c7d759eb7b12f91a25f4d3cd03ff5209046ddfc2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Jul 2020 17:42:47 -0700 Subject: ethtool: add tunnel info interface Add an interface to report offloaded UDP ports via ethtool netlink. Now that core takes care of tracking which UDP tunnel ports the NICs are aware of we can quite easily export this information out to user space. The responsibility of writing the netlink dumps is split between ethtool code and udp_tunnel_nic.c - since udp_tunnel module may not always be loaded, yet we should always report the capabilities of the NIC. $ ethtool --show-tunnels eth0 Tunnel information for eth0: UDP port table 0: Size: 4 Types: vxlan No entries UDP port table 1: Size: 4 Types: geneve, vxlan-gpe Entries (1): port 1230, vxlan-gpe v4: - back to v2, build fix is now directly in udp_tunnel.h v3: - don't compile ETHTOOL_MSG_TUNNEL_INFO_GET in if CONFIG_INET not set. v2: - fix string set count, - reorder enums in the uAPI, - fix type of ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES to bitset in docs and comments. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/Makefile | 3 +- net/ethtool/common.c | 9 ++ net/ethtool/common.h | 1 + net/ethtool/netlink.c | 12 +++ net/ethtool/netlink.h | 4 + net/ethtool/strset.c | 5 + net/ethtool/tunnels.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/udp_tunnel_nic.c | 69 ++++++++++++ 8 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/tunnels.c (limited to 'net') diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 0c2b94f20499..7a849ff22dad 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ - channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o + channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ + tunnels.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index c54166713797..ed19573fccd7 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -272,6 +273,14 @@ const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); +const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { + [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", + [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", + [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE] = "vxlan-gpe", +}; +static_assert(ARRAY_SIZE(udp_tunnel_type_names) == + __ETHTOOL_UDP_TUNNEL_TYPE_CNT); + /* return false if legacy contained non-0 deprecated fields * maxtxpkt/maxrxpkt. rest of ksettings always updated */ diff --git a/net/ethtool/common.h b/net/ethtool/common.h index b83bef38368c..3d9251c95a8b 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -28,6 +28,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; +extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 88fd07f47040..fb9d096faaa4 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -181,6 +181,12 @@ err: return NULL; } +void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd) +{ + return genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + ðtool_genl_family, 0, cmd); +} + void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) { return genlmsg_put(skb, 0, ++ethnl_bcast_seq, ðtool_genl_family, 0, @@ -849,6 +855,12 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_act_cable_test_tdr, }, + { + .cmd = ETHTOOL_MSG_TUNNEL_INFO_GET, + .doit = ethnl_tunnel_info_doit, + .start = ethnl_tunnel_info_start, + .dumpit = ethnl_tunnel_info_dumpit, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a96b6e90dc2..e2085005caac 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -19,6 +19,7 @@ int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev, struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, u16 hdr_attrtype, struct genl_info *info, void **ehdrp); +void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd); void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); @@ -361,5 +362,8 @@ int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info); int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); +int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); +int ethnl_tunnel_info_start(struct netlink_callback *cb); +int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 773634b6b048..82707b662fe4 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -75,6 +75,11 @@ static const struct strset_info info_template[] = { .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, + [ETH_SS_UDP_TUNNEL_TYPES] = { + .per_dev = false, + .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + .strings = udp_tunnel_type_names, + }, }; struct strset_req_info { diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c new file mode 100644 index 000000000000..6b89255f1231 --- /dev/null +++ b/net/ethtool/tunnels.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "bitset.h" +#include "common.h" +#include "netlink.h" + +static const struct nla_policy +ethtool_tunnel_info_policy[ETHTOOL_A_TUNNEL_INFO_MAX + 1] = { + [ETHTOOL_A_TUNNEL_INFO_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_TUNNEL_INFO_HEADER] = { .type = NLA_NESTED }, +}; + +static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN == ilog2(UDP_TUNNEL_TYPE_VXLAN)); +static_assert(ETHTOOL_UDP_TUNNEL_TYPE_GENEVE == ilog2(UDP_TUNNEL_TYPE_GENEVE)); +static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE == + ilog2(UDP_TUNNEL_TYPE_VXLAN_GPE)); + +static ssize_t +ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, + struct netlink_ext_ack *extack) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct udp_tunnel_nic_info *info; + unsigned int i; + size_t size; + int ret; + + info = req_base->dev->udp_tunnel_nic_info; + if (!info) { + NL_SET_ERR_MSG(extack, + "device does not report tunnel offload info"); + return -EOPNOTSUPP; + } + + size = nla_total_size(0); /* _INFO_UDP_PORTS */ + + for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { + if (!info->tables[i].n_entries) + return size; + + size += nla_total_size(0); /* _UDP_TABLE */ + size += nla_total_size(sizeof(u32)); /* _UDP_TABLE_SIZE */ + ret = ethnl_bitset32_size(&info->tables[i].tunnel_types, NULL, + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + udp_tunnel_type_names, compact); + if (ret < 0) + return ret; + size += ret; + + size += udp_tunnel_nic_dump_size(req_base->dev, i); + } + + return size; +} + +static int +ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base, + struct sk_buff *skb) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct udp_tunnel_nic_info *info; + struct nlattr *ports, *table; + unsigned int i; + + info = req_base->dev->udp_tunnel_nic_info; + if (!info) + return -EOPNOTSUPP; + + ports = nla_nest_start(skb, ETHTOOL_A_TUNNEL_INFO_UDP_PORTS); + if (!ports) + return -EMSGSIZE; + + for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { + if (!info->tables[i].n_entries) + break; + + table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE); + if (!table) + goto err_cancel_ports; + + if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, + info->tables[i].n_entries)) + goto err_cancel_table; + + if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, + &info->tables[i].tunnel_types, NULL, + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + udp_tunnel_type_names, compact)) + goto err_cancel_table; + + if (udp_tunnel_nic_dump_write(req_base->dev, i, skb)) + goto err_cancel_table; + + nla_nest_end(skb, table); + } + + nla_nest_end(skb, ports); + + return 0; + +err_cancel_table: + nla_nest_cancel(skb, table); +err_cancel_ports: + nla_nest_cancel(skb, ports); + return -EMSGSIZE; +} + +static int +ethnl_tunnel_info_req_parse(struct ethnl_req_info *req_info, + const struct nlmsghdr *nlhdr, struct net *net, + struct netlink_ext_ack *extack, bool require_dev) +{ + struct nlattr *tb[ETHTOOL_A_TUNNEL_INFO_MAX + 1]; + int ret; + + ret = nlmsg_parse(nlhdr, GENL_HDRLEN, tb, ETHTOOL_A_TUNNEL_INFO_MAX, + ethtool_tunnel_info_policy, extack); + if (ret < 0) + return ret; + + return ethnl_parse_header_dev_get(req_info, + tb[ETHTOOL_A_TUNNEL_INFO_HEADER], + net, extack, require_dev); +} + +int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_req_info req_info = {}; + struct sk_buff *rskb; + void *reply_payload; + int reply_len; + int ret; + + ret = ethnl_tunnel_info_req_parse(&req_info, info->nlhdr, + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + + rtnl_lock(); + ret = ethnl_tunnel_info_reply_size(&req_info, info->extack); + if (ret < 0) + goto err_unlock_rtnl; + reply_len = ret + ethnl_reply_header_size(); + + rskb = ethnl_reply_init(reply_len, req_info.dev, + ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_A_TUNNEL_INFO_HEADER, + info, &reply_payload); + if (!rskb) { + ret = -ENOMEM; + goto err_unlock_rtnl; + } + + ret = ethnl_tunnel_info_fill_reply(&req_info, rskb); + if (ret) + goto err_free_msg; + rtnl_unlock(); + dev_put(req_info.dev); + genlmsg_end(rskb, reply_payload); + + return genlmsg_reply(rskb, info); + +err_free_msg: + nlmsg_free(rskb); +err_unlock_rtnl: + rtnl_unlock(); + dev_put(req_info.dev); + return ret; +} + +struct ethnl_tunnel_info_dump_ctx { + struct ethnl_req_info req_info; + int pos_hash; + int pos_idx; +}; + +int ethnl_tunnel_info_start(struct netlink_callback *cb) +{ + struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + memset(ctx, 0, sizeof(*ctx)); + + ret = ethnl_tunnel_info_req_parse(&ctx->req_info, cb->nlh, + sock_net(cb->skb->sk), cb->extack, + false); + if (ctx->req_info.dev) { + dev_put(ctx->req_info.dev); + ctx->req_info.dev = NULL; + } + + return ret; +} + +int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + int s_idx = ctx->pos_idx; + int h, idx = 0; + int ret = 0; + void *ehdr; + + rtnl_lock(); + cb->seq = net->dev_base_seq; + for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + struct hlist_head *head; + struct net_device *dev; + + head = &net->dev_index_head[h]; + idx = 0; + hlist_for_each_entry(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_TUNNEL_INFO_GET); + if (!ehdr) { + ret = -EMSGSIZE; + goto out; + } + + ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER); + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + goto out; + } + + ctx->req_info.dev = dev; + ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb); + ctx->req_info.dev = NULL; + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + if (ret == -EOPNOTSUPP) + goto cont; + goto out; + } + genlmsg_end(skb, ehdr); +cont: + idx++; + } + } +out: + rtnl_unlock(); + + ctx->pos_hash = h; + ctx->pos_idx = idx; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + + if (ret == -EMSGSIZE && skb->len) + return skb->len; + return ret; +} diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 056cfe0b770e..f0dbd9905a53 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2020 Facebook Inc. +#include #include #include #include @@ -72,6 +73,12 @@ udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry) return entry->use_cnt == 0 && !entry->flags; } +static bool +udp_tunnel_nic_entry_is_present(struct udp_tunnel_nic_table_entry *entry) +{ + return entry->use_cnt && !(entry->flags & ~UDP_TUNNEL_NIC_ENTRY_FROZEN); +} + static bool udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry) { @@ -564,12 +571,74 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) __udp_tunnel_nic_device_sync(dev, utn); } +static size_t +__udp_tunnel_nic_dump_size(struct net_device *dev, unsigned int table) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + unsigned int j; + size_t size; + + utn = dev->udp_tunnel_nic; + if (!utn) + return 0; + + size = 0; + for (j = 0; j < info->tables[table].n_entries; j++) { + if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j])) + continue; + + size += nla_total_size(0) + /* _TABLE_ENTRY */ + nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */ + nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */ + } + + return size; +} + +static int +__udp_tunnel_nic_dump_write(struct net_device *dev, unsigned int table, + struct sk_buff *skb) +{ + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; + struct udp_tunnel_nic *utn; + struct nlattr *nest; + unsigned int j; + + utn = dev->udp_tunnel_nic; + if (!utn) + return 0; + + for (j = 0; j < info->tables[table].n_entries; j++) { + if (!udp_tunnel_nic_entry_is_present(&utn->entries[table][j])) + continue; + + nest = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); + + if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, + utn->entries[table][j].port) || + nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, + ilog2(utn->entries[table][j].type))) + goto err_cancel; + + nla_nest_end(skb, nest); + } + + return 0; + +err_cancel: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { .get_port = __udp_tunnel_nic_get_port, .set_port_priv = __udp_tunnel_nic_set_port_priv, .add_port = __udp_tunnel_nic_add_port, .del_port = __udp_tunnel_nic_del_port, .reset_ntf = __udp_tunnel_nic_reset_ntf, + .dump_size = __udp_tunnel_nic_dump_size, + .dump_write = __udp_tunnel_nic_dump_write, }; static void -- cgit From c57544b3dec4480c1b455a43f18be5ec91ab3136 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:07 +0300 Subject: devlink: Refactor devlink health reporter constructor Prepare a common routine in devlink_health_reporter_create() for usage in similar functions for devlink port health reporters. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 346d385ba09f..a203d35fb56f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5321,6 +5321,31 @@ devlink_health_reporter_find_by_name(struct devlink *devlink, return NULL; } +static struct devlink_health_reporter * +__devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + if (WARN_ON(graceful_period && !ops->recover)) + return ERR_PTR(-EINVAL); + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) + return ERR_PTR(-ENOMEM); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = !!ops->recover; + reporter->auto_dump = !!ops->dump; + mutex_init(&reporter->dump_lock); + refcount_set(&reporter->refcount, 1); + return reporter; +} + /** * devlink_health_reporter_create - create devlink health reporter * @@ -5342,25 +5367,11 @@ devlink_health_reporter_create(struct devlink *devlink, goto unlock; } - if (WARN_ON(graceful_period && !ops->recover)) { - reporter = ERR_PTR(-EINVAL); + reporter = __devlink_health_reporter_create(devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) goto unlock; - } - - reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); - if (!reporter) { - reporter = ERR_PTR(-ENOMEM); - goto unlock; - } - reporter->priv = priv; - reporter->ops = ops; - reporter->devlink = devlink; - reporter->graceful_period = graceful_period; - reporter->auto_recover = !!ops->recover; - reporter->auto_dump = !!ops->dump; - mutex_init(&reporter->dump_lock); - refcount_set(&reporter->refcount, 1); list_add_tail(&reporter->list, &devlink->reporter_list); unlock: mutex_unlock(&devlink->reporters_lock); -- cgit From 3c5584bf0a0493e8d232ade65f4b9c5e995f3a0c Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:08 +0300 Subject: devlink: Rework devlink health reporter destructor Devlink keeps its own reference to every reporter in a list and inits refcount to 1 upon reporter's creation. Existing destructor waits to free the memory indefinitely using msleep() until all references except devlink's own are put. Rework this mechanism by moving memory free routine to a separate function, which is called when the last reporter reference is put. Besides, it allows to call __devlink_health_reporter_destroy() while locked on a reporters list mutex in symmetry to __devlink_health_reporter_create(), which is required in follow-up patch. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index a203d35fb56f..b85f2113398d 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5379,6 +5379,29 @@ unlock: } EXPORT_SYMBOL_GPL(devlink_health_reporter_create); +static void +devlink_health_reporter_free(struct devlink_health_reporter *reporter) +{ + mutex_destroy(&reporter->dump_lock); + if (reporter->dump_fmsg) + devlink_fmsg_free(reporter->dump_fmsg); + kfree(reporter); +} + +static void +devlink_health_reporter_put(struct devlink_health_reporter *reporter) +{ + if (refcount_dec_and_test(&reporter->refcount)) + devlink_health_reporter_free(reporter); +} + +static void +__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + list_del(&reporter->list); + devlink_health_reporter_put(reporter); +} + /** * devlink_health_reporter_destroy - destroy devlink health reporter * @@ -5388,14 +5411,8 @@ void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { mutex_lock(&reporter->devlink->reporters_lock); - list_del(&reporter->list); + __devlink_health_reporter_destroy(reporter); mutex_unlock(&reporter->devlink->reporters_lock); - while (refcount_read(&reporter->refcount) > 1) - msleep(100); - mutex_destroy(&reporter->dump_lock); - if (reporter->dump_fmsg) - devlink_fmsg_free(reporter->dump_fmsg); - kfree(reporter); } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); @@ -5665,12 +5682,6 @@ unlock: return NULL; } -static void -devlink_health_reporter_put(struct devlink_health_reporter *reporter) -{ - refcount_dec(&reporter->refcount); -} - void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state) -- cgit From bd8210055c36a453358d75017dce7522d950dd38 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:09 +0300 Subject: devlink: Create generic devlink health reporter search function Add a generic __devlink_health_reporter_find_by_name() that can be used with arbitrary devlink health reporter list. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index b85f2113398d..4e995de65b36 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5309,18 +5309,28 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter) EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); static struct devlink_health_reporter * -devlink_health_reporter_find_by_name(struct devlink *devlink, - const char *reporter_name) +__devlink_health_reporter_find_by_name(struct list_head *reporter_list, + struct mutex *list_lock, + const char *reporter_name) { struct devlink_health_reporter *reporter; - lockdep_assert_held(&devlink->reporters_lock); - list_for_each_entry(reporter, &devlink->reporter_list, list) + lockdep_assert_held(list_lock); + list_for_each_entry(reporter, reporter_list, list) if (!strcmp(reporter->ops->name, reporter_name)) return reporter; return NULL; } +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink->reporter_list, + &devlink->reporters_lock, + reporter_name); +} + static struct devlink_health_reporter * __devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, -- cgit From f4f541660121aeb91a1b462ab3f3c5a86ab7c3dd Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:10 +0300 Subject: devlink: Implement devlink health reporters on per-port basis Add devlink-health reporter support on per-port basis. The main difference existing devlink-health is that port reporters are stored in per-devlink_port lists. Upon creation of such health reporter the reference to a port it belongs to is stored in reporter struct. Fill the port index attribute in devlink-health response to allow devlink userspace utility to distinguish between device and port reporters. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 94 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 4e995de65b36..b4a231ca7135 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -386,19 +386,21 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) -#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) -#define DEVLINK_NL_FLAG_NEED_SB BIT(2) +#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) +#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(2) +#define DEVLINK_NL_FLAG_NEED_SB BIT(3) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port; struct devlink *devlink; int err; @@ -413,14 +415,17 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { info->user_ptr[0] = devlink; } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { - struct devlink_port *devlink_port; - devlink_port = devlink_port_get_from_info(devlink, info); if (IS_ERR(devlink_port)) { err = PTR_ERR(devlink_port); goto unlock; } info->user_ptr[0] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) { + info->user_ptr[0] = devlink; + devlink_port = devlink_port_get_from_info(devlink, info); + if (!IS_ERR(devlink_port)) + info->user_ptr[1] = devlink_port; } if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { struct devlink_sb *devlink_sb; @@ -5287,6 +5292,7 @@ struct devlink_health_reporter { void *priv; const struct devlink_health_reporter_ops *ops; struct devlink *devlink; + struct devlink_port *devlink_port; struct devlink_fmsg *dump_fmsg; struct mutex dump_lock; /* lock parallel read/write from dump buffers */ u64 graceful_period; @@ -5331,6 +5337,15 @@ devlink_health_reporter_find_by_name(struct devlink *devlink, reporter_name); } +static struct devlink_health_reporter * +devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list, + &devlink_port->reporters_lock, + reporter_name); +} + static struct devlink_health_reporter * __devlink_health_reporter_create(struct devlink *devlink, const struct devlink_health_reporter_ops *ops, @@ -5443,6 +5458,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; + if (reporter->devlink_port) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index)) + goto genlmsg_cancel; + } reporter_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_HEALTH_REPORTER); if (!reporter_attr) @@ -5650,17 +5669,28 @@ devlink_health_reporter_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) { struct devlink_health_reporter *reporter; + struct devlink_port *devlink_port; char *reporter_name; if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) return NULL; reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - mutex_lock(&devlink->reporters_lock); - reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); - if (reporter) - refcount_inc(&reporter->refcount); - mutex_unlock(&devlink->reporters_lock); + devlink_port = devlink_port_get_from_attrs(devlink, attrs); + if (IS_ERR(devlink_port)) { + mutex_lock(&devlink->reporters_lock); + reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink->reporters_lock); + } else { + mutex_lock(&devlink_port->reporters_lock); + reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink_port->reporters_lock); + } + return reporter; } @@ -5748,6 +5778,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct devlink_health_reporter *reporter; + struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; int idx = 0; @@ -5778,6 +5809,31 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, } mutex_unlock(&devlink->reporters_lock); } + + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + list_for_each_entry(port, &devlink->port_list, list) { + mutex_lock(&port->reporters_lock); + list_for_each_entry(reporter, &port->reporter_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&port->reporters_lock); + goto out; + } + idx++; + } + mutex_unlock(&port->reporters_lock); + } + } out: mutex_unlock(&devlink_mutex); @@ -7157,7 +7213,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, /* can be retrieved by unprivileged users */ }, @@ -7166,7 +7222,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7174,7 +7230,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7182,7 +7238,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7191,7 +7247,7 @@ static const struct genl_ops devlink_nl_ops[] = { GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7199,7 +7255,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7459,6 +7515,8 @@ int devlink_port_register(struct devlink *devlink, list_add_tail(&devlink_port->list, &devlink->port_list); INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); + INIT_LIST_HEAD(&devlink_port->reporter_list); + mutex_init(&devlink_port->reporters_lock); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); devlink_port_type_warn_schedule(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); @@ -7475,6 +7533,8 @@ void devlink_port_unregister(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; + WARN_ON(!list_empty(&devlink_port->reporter_list)); + mutex_destroy(&devlink_port->reporters_lock); devlink_port_type_warn_cancel(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); mutex_lock(&devlink->lock); -- cgit From 15c724b997a8fe1a677cf11797fb29c0bdecc63f Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Fri, 10 Jul 2020 15:25:11 +0300 Subject: devlink: Add devlink health port reporters API In order to use new devlink port health reporters infrastructure, add corresponding constructor and destructor functions. Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index b4a231ca7135..20a83aace642 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5371,6 +5371,42 @@ __devlink_health_reporter_create(struct devlink *devlink, return reporter; } +/** + * devlink_port_health_reporter_create - create devlink health reporter for + * specified port instance + * + * @port: devlink_port which should contain the new reporter + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&port->reporters_lock); + if (__devlink_health_reporter_find_by_name(&port->reporter_list, + &port->reporters_lock, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + reporter = __devlink_health_reporter_create(port->devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) + goto unlock; + + reporter->devlink_port = port; + list_add_tail(&reporter->list, &port->reporter_list); +unlock: + mutex_unlock(&port->reporters_lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create); + /** * devlink_health_reporter_create - create devlink health reporter * @@ -5441,6 +5477,20 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); +/** + * devlink_port_health_reporter_destroy - destroy devlink port health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + mutex_lock(&reporter->devlink_port->reporters_lock); + __devlink_health_reporter_destroy(reporter); + mutex_unlock(&reporter->devlink_port->reporters_lock); +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy); + static int devlink_nl_health_reporter_fill(struct sk_buff *msg, struct devlink *devlink, -- cgit From a594920f8747fa032c784c3660d6cd5a8ab291f8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 11 Jul 2020 00:57:59 +0900 Subject: inet: Remove an unnecessary argument of syn_ack_recalc(). Commit 0c3d79bce48034018e840468ac5a642894a521a3 ("tcp: reduce SYN-ACK retrans for TCP_DEFER_ACCEPT") introduces syn_ack_recalc() which decides if a minisock is held and a SYN+ACK is retransmitted or not. If rskq_defer_accept is not zero in syn_ack_recalc(), max_retries always has the same value because max_retries is overwritten by rskq_defer_accept in reqsk_timer_handler(). This commit adds three changes: - remove redundant non-zero check for rskq_defer_accept in reqsk_timer_handler(). - remove max_retries from the arguments of syn_ack_recalc() and use rskq_defer_accept instead. - rename thresh to max_syn_ack_retries for readability. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Benjamin Herrenschmidt CC: Julian Anastasov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index afaf582a5aa9..22b0e7336360 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -648,20 +648,19 @@ no_route: EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); /* Decide when to expire the request and when to resend SYN-ACK */ -static inline void syn_ack_recalc(struct request_sock *req, const int thresh, - const int max_retries, - const u8 rskq_defer_accept, - int *expire, int *resend) +static void syn_ack_recalc(struct request_sock *req, + const int max_syn_ack_retries, + const u8 rskq_defer_accept, + int *expire, int *resend) { if (!rskq_defer_accept) { - *expire = req->num_timeout >= thresh; + *expire = req->num_timeout >= max_syn_ack_retries; *resend = 1; return; } - *expire = req->num_timeout >= thresh && - (!inet_rsk(req)->acked || req->num_timeout >= max_retries); - /* - * Do not resend while waiting for data after ACK, + *expire = req->num_timeout >= max_syn_ack_retries && + (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept); + /* Do not resend while waiting for data after ACK, * start to resend on end of deferring period to give * last chance for data or ACK to create established socket. */ @@ -720,15 +719,12 @@ static void reqsk_timer_handler(struct timer_list *t) struct net *net = sock_net(sk_listener); struct inet_connection_sock *icsk = inet_csk(sk_listener); struct request_sock_queue *queue = &icsk->icsk_accept_queue; - int qlen, expire = 0, resend = 0; - int max_retries, thresh; - u8 defer_accept; + int max_syn_ack_retries, qlen, expire = 0, resend = 0; if (inet_sk_state_load(sk_listener) != TCP_LISTEN) goto drop; - max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; - thresh = max_retries; + max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means @@ -750,17 +746,14 @@ static void reqsk_timer_handler(struct timer_list *t) if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) { int young = reqsk_queue_len_young(queue) << 1; - while (thresh > 2) { + while (max_syn_ack_retries > 2) { if (qlen < young) break; - thresh--; + max_syn_ack_retries--; young <<= 1; } } - defer_accept = READ_ONCE(queue->rskq_defer_accept); - if (defer_accept) - max_retries = defer_accept; - syn_ack_recalc(req, thresh, max_retries, defer_accept, + syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept), &expire, &resend); req->rsk_ops->syn_ack_timeout(req); if (!expire && -- cgit From 94339443686b36d3223bc032b7947267474e2679 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 11 Jul 2020 18:05:04 +0300 Subject: net: bridge: notify on vlan tunnel changes done via the old api If someone uses the old vlan API to configure tunnel mappings we'll only generate the old-style full port notification. That would be a problem if we are monitoring the new vlan notifications for changes. The patch resolves the issue by adding vlan notifications to the old tunnel netlink code. As usual we try to compress the notifications for as many vlans in a range as possible, thus a vlan tunnel change is considered able to enter the "current" vlan notification range if: 1. vlan exists 2. it has actually changed (curr_change == true) 3. it passes all standard vlan notification range checks done by br_vlan_can_enter_range() such as option equality, id continuity etc Note that vlan tunnel changes (add/del) are considered a part of vlan options so only RTM_NEWVLAN notification is generated with the relevant information inside. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink_tunnel.c | 49 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 162998e2f039..8914290c75d4 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -250,6 +250,36 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, return 0; } +/* send a notification if v_curr can't enter the range and start a new one */ +static void __vlan_tunnel_handle_range(const struct net_bridge_port *p, + struct net_bridge_vlan **v_start, + struct net_bridge_vlan **v_end, + int v_curr, bool curr_change) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + + vg = nbp_vlan_group(p); + if (!vg) + return; + + v = br_vlan_find(vg, v_curr); + + if (!*v_start) + goto out_init; + + if (v && curr_change && br_vlan_can_enter_range(v, *v_end)) { + *v_end = v; + return; + } + + br_vlan_notify(p->br, p, (*v_start)->vid, (*v_end)->vid, RTM_NEWVLAN); +out_init: + /* we start a range only if there are any changes to notify about */ + *v_start = curr_change ? v : NULL; + *v_end = *v_start; +} + int br_process_vlan_tunnel_info(const struct net_bridge *br, const struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, @@ -263,6 +293,7 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br, return -EINVAL; memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info)); } else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) { + struct net_bridge_vlan *v_start = NULL, *v_end = NULL; int t, v; if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN)) @@ -272,11 +303,24 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br, return -EINVAL; t = tinfo_last->tunid; for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { - err = br_vlan_tunnel_info(p, cmd, v, t, changed); + bool curr_change = false; + + err = br_vlan_tunnel_info(p, cmd, v, t, &curr_change); if (err) - return err; + break; t++; + + if (curr_change) + *changed = curr_change; + __vlan_tunnel_handle_range(p, &v_start, &v_end, v, + curr_change); } + if (v_start && v_end) + br_vlan_notify(br, p, v_start->vid, v_end->vid, + RTM_NEWVLAN); + if (err) + return err; + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); } else { @@ -286,6 +330,7 @@ int br_process_vlan_tunnel_info(const struct net_bridge *br, tinfo_curr->tunid, changed); if (err) return err; + br_vlan_notify(br, p, tinfo_curr->vid, 0, RTM_NEWVLAN); memset(tinfo_last, 0, sizeof(struct vtunnel_info)); memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); } -- cgit From 2749c69734298905aedb0629f2bc66346f0031f9 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 9 Jul 2020 13:16:51 +0300 Subject: xfrm interface: avoid xi lookup in xfrmi_decode_session() The xfrmi context exists in the netdevice priv context. Avoid looking for it in a separate list. Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index a79eb49a4e0d..36a765eac034 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -47,6 +47,7 @@ static int xfrmi_dev_init(struct net_device *dev); static void xfrmi_dev_setup(struct net_device *dev); static struct rtnl_link_ops xfrmi_link_ops __read_mostly; static unsigned int xfrmi_net_id __read_mostly; +static const struct net_device_ops xfrmi_netdev_ops; struct xfrmi_net { /* lists for storing interfaces in use */ @@ -73,8 +74,7 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, unsigned short family) { - struct xfrmi_net *xfrmn; - struct xfrm_if *xi; + struct net_device *dev; int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) @@ -88,18 +88,21 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, ifindex = inet_sdif(skb); break; } - if (!ifindex) - ifindex = skb->dev->ifindex; - xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); + if (ifindex) { + struct net *net = xs_net(xfrm_input_state(skb)); - for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { - if (ifindex == xi->dev->ifindex && - (xi->dev->flags & IFF_UP)) - return xi; + dev = dev_get_by_index_rcu(net, ifindex); + } else { + dev = skb->dev; } - return NULL; + if (!dev || !(dev->flags & IFF_UP)) + return NULL; + if (dev->netdev_ops != &xfrmi_netdev_ops) + return NULL; + + return netdev_priv(dev); } static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) -- cgit From e98e44562ba2ee994f4fd1e32be7e327edd263ca Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 9 Jul 2020 13:16:52 +0300 Subject: xfrm interface: store xfrmi contexts in a hash by if_id xfrmi_lookup() is called on every packet. Using a single list for looking up if_id becomes a bottleneck when having many xfrm interfaces. Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 36a765eac034..96496fdfe3ce 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -49,20 +49,28 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly; static unsigned int xfrmi_net_id __read_mostly; static const struct net_device_ops xfrmi_netdev_ops; +#define XFRMI_HASH_BITS 8 +#define XFRMI_HASH_SIZE BIT(XFRMI_HASH_BITS) + struct xfrmi_net { /* lists for storing interfaces in use */ - struct xfrm_if __rcu *xfrmi[1]; + struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE]; }; #define for_each_xfrmi_rcu(start, xi) \ for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next)) +static u32 xfrmi_hash(u32 if_id) +{ + return hash_32(if_id, XFRMI_HASH_BITS); +} + static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if *xi; - for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { + for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) { if (x->if_id == xi->p.if_id && (xi->dev->flags & IFF_UP)) return xi; @@ -107,7 +115,7 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) { - struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0]; + struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)]; rcu_assign_pointer(xi->next , rtnl_dereference(*xip)); rcu_assign_pointer(*xip, xi); @@ -118,7 +126,7 @@ static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi) struct xfrm_if __rcu **xip; struct xfrm_if *iter; - for (xip = &xfrmn->xfrmi[0]; + for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)]; (iter = rtnl_dereference(*xip)) != NULL; xip = &iter->next) { if (xi == iter) { @@ -162,7 +170,7 @@ static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p) struct xfrm_if *xi; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); - for (xip = &xfrmn->xfrmi[0]; + for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) if (xi->p.if_id == p->if_id) @@ -761,11 +769,14 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if __rcu **xip; struct xfrm_if *xi; + int i; - for (xip = &xfrmn->xfrmi[0]; - (xi = rtnl_dereference(*xip)) != NULL; - xip = &xi->next) - unregister_netdevice_queue(xi->dev, &list); + for (i = 0; i < XFRMI_HASH_SIZE; i++) { + for (xip = &xfrmn->xfrmi[i]; + (xi = rtnl_dereference(*xip)) != NULL; + xip = &xi->next) + unregister_netdevice_queue(xi->dev, &list); + } } unregister_netdevice_many(&list); rtnl_unlock(); -- cgit From 75bbd2ea50ba1c5d9da878a17e92eac02fe0fd3a Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 10 Jul 2020 17:39:18 -0400 Subject: Bluetooth: Prevent out-of-bounds read in hci_inquiry_result_evt() Check `num_rsp` before using it as for-loop counter. Cc: stable@vger.kernel.org Signed-off-by: Peilin Ye Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 927bde511170..b97d0247983c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2517,7 +2517,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s num_rsp %d", hdev->name, num_rsp); - if (!num_rsp) + if (!num_rsp || skb->len < num_rsp * sizeof(*info) + 1) return; if (hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) -- cgit From 629b49c848ee71244203934347bd7730b0ddee8d Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 10 Jul 2020 17:45:26 -0400 Subject: Bluetooth: Prevent out-of-bounds read in hci_inquiry_result_with_rssi_evt() Check `num_rsp` before using it as for-loop counter. Add `unlock` label. Cc: stable@vger.kernel.org Signed-off-by: Peilin Ye Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b97d0247983c..61f8c4d12028 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4159,6 +4159,9 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct inquiry_info_with_rssi_and_pscan_mode *info; info = (void *) (skb->data + 1); + if (skb->len < num_rsp * sizeof(*info) + 1) + goto unlock; + for (; num_rsp; num_rsp--, info++) { u32 flags; @@ -4180,6 +4183,9 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, } else { struct inquiry_info_with_rssi *info = (void *) (skb->data + 1); + if (skb->len < num_rsp * sizeof(*info) + 1) + goto unlock; + for (; num_rsp; num_rsp--, info++) { u32 flags; @@ -4200,6 +4206,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, } } +unlock: hci_dev_unlock(hdev); } -- cgit From c9a0f3b85e09dd16665b639cb884490410619434 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 11 Jul 2020 23:53:24 +0200 Subject: bpf: Resolve BTF IDs in vmlinux image Using BTF_ID_LIST macro to define lists for several helpers using BTF arguments. And running resolve_btfids on vmlinux elf object during linking, so the .BTF_ids section gets the IDs resolved. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Tested-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200711215329.41165-5-jolsa@kernel.org --- net/core/filter.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index ddcc0d6209e1..4e572441e64a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -75,6 +75,7 @@ #include #include #include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -3779,7 +3780,9 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static int bpf_skb_output_btf_ids[5]; +BTF_ID_LIST(bpf_skb_output_btf_ids) +BTF_ID(struct, sk_buff) + const struct bpf_func_proto bpf_skb_output_proto = { .func = bpf_skb_event_output, .gpl_only = true, @@ -4173,7 +4176,9 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; -static int bpf_xdp_output_btf_ids[5]; +BTF_ID_LIST(bpf_xdp_output_btf_ids) +BTF_ID(struct, xdp_buff) + const struct bpf_func_proto bpf_xdp_output_proto = { .func = bpf_xdp_event_output, .gpl_only = true, -- cgit From 528ae84a34ffd40da5d3fbff740d28d6dc2c8f8a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 13 Jul 2020 10:55:46 +0300 Subject: net: bridge: fix undefined br_vlan_can_enter_range in tunnel code If bridge vlan filtering is not defined we won't have br_vlan_can_enter_range and thus will get a compile error as was reported by Stephen and the build bot. So let's define a stub for when vlan filtering is not used. Fixes: 94339443686b ("net: bridge: notify on vlan tunnel changes done via the old api") Reported-by: Stephen Rothwell Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_private.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a6f348bea29a..baa1500f384f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1200,6 +1200,12 @@ static inline void br_vlan_notify(const struct net_bridge *br, int cmd) { } + +static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr, + const struct net_bridge_vlan *range_end) +{ + return true; +} #endif /* br_vlan_options.c */ -- cgit From 266f312845857994f761699e72b1023f576e5f13 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 09:51:08 +0200 Subject: dccp: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: David S. Miller --- net/dccp/Kconfig | 2 +- net/dccp/ccids/Kconfig | 4 ++-- net/dccp/ccids/ccid3.c | 2 +- net/dccp/ccids/ccid3.h | 2 +- net/dccp/ccids/lib/packet_history.c | 2 +- net/dccp/ccids/lib/packet_history.h | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 51ac2631fb48..0c7d2f66ba27 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -5,7 +5,7 @@ menuconfig IP_DCCP help Datagram Congestion Control Protocol (RFC 4340) - From http://www.ietf.org/rfc/rfc4340.txt: + From https://www.ietf.org/rfc/rfc4340.txt: The Datagram Congestion Control Protocol (DCCP) is a transport protocol that implements bidirectional, unicast connections of diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 4d7771f36eff..a3eeb84d16f9 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -26,13 +26,13 @@ config IP_DCCP_CCID3 relatively smooth sending rate is of importance. CCID-3 is further described in RFC 4342, - http://www.ietf.org/rfc/rfc4342.txt + https://www.ietf.org/rfc/rfc4342.txt The TFRC congestion control algorithms were initially described in RFC 5348. This text was extracted from RFC 4340 (sec. 10.2), - http://www.ietf.org/rfc/rfc4340.txt + https://www.ietf.org/rfc/rfc4340.txt If in doubt, say N. diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 9ef9bee9610f..aef72f6a2829 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -7,7 +7,7 @@ * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND - * research group. For further information please see http://www.wand.net.nz/ + * research group. For further information please see https://www.wand.net.nz/ * * This code also uses code from Lulea University, rereleased as GPL by its * authors: diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 081c195e7f7d..02e0fc9f6334 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -6,7 +6,7 @@ * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND - * research group. For further information please see http://www.wand.net.nz/ + * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 2d41bb036271..0bef57b908fb 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -6,7 +6,7 @@ * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND - * research group. For further information please see http://www.wand.net.nz/ + * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index a157d874840b..159cc9326eab 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -6,7 +6,7 @@ * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * This code has been developed by the University of Waikato WAND - * research group. For further information please see http://www.wand.net.nz/ + * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its -- cgit From 2be53e0e4690a764b6af18536eb78b811f40eacc Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Mon, 13 Jul 2020 11:02:51 +0200 Subject: AX.25 Kconfig: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: David S. Miller --- net/ax25/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig index 97d686d115c0..d3a9843a043d 100644 --- a/net/ax25/Kconfig +++ b/net/ax25/Kconfig @@ -8,7 +8,7 @@ menuconfig HAMRADIO bool "Amateur Radio support" help If you want to connect your Linux box to an amateur radio, answer Y - here. You want to read + here. You want to read and more specifically about AX.25 on Linux . @@ -39,11 +39,11 @@ config AX25 Information about where to get supporting software for Linux amateur radio as well as information about how to configure an AX.25 port is contained in the AX25-HOWTO, available from - . You might also want to + . You might also want to check out the file in the kernel source. More information about digital amateur radio in general is on the WWW at - . + . To compile this driver as a module, choose M here: the module will be called ax25. @@ -90,7 +90,7 @@ config NETROM . You also might want to check out the file . More information about digital amateur radio in general is on the WWW at - . + . To compile this driver as a module, choose M here: the module will be called netrom. @@ -109,7 +109,7 @@ config ROSE . You also might want to check out the file . More information about digital amateur radio in general is on the WWW at - . + . To compile this driver as a module, choose M here: the module will be called rose. -- cgit From 8aa5a33578e9685d06020bd10d1637557423e945 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:33 +0000 Subject: xsk: Add new statistics It can be useful for the user to know the reason behind a dropped packet. Introduce new counters which track drops on the receive path caused by: 1. rx ring being full 2. fill ring being empty Also, on the tx path introduce a counter which tracks the number of times we attempt pull from the tx ring when it is empty. Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-2-ciara.loftus@intel.com --- net/xdp/xsk.c | 36 +++++++++++++++++++++++++++++++----- net/xdp/xsk_buff_pool.c | 1 + net/xdp/xsk_queue.h | 6 ++++++ 3 files changed, 38 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3700266229f6..26e3bba8c204 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -123,7 +123,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) addr = xp_get_handle(xskb); err = xskq_prod_reserve_desc(xs->rx, addr, len); if (err) { - xs->rx_dropped++; + xs->rx_queue_full++; return err; } @@ -274,8 +274,10 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) { - if (!xskq_cons_peek_desc(xs->tx, desc, umem)) + if (!xskq_cons_peek_desc(xs->tx, desc, umem)) { + xs->tx->queue_empty_descs++; continue; + } /* This is the backpressure mechanism for the Tx path. * Reserve space in the completion queue and only proceed @@ -387,6 +389,8 @@ static int xsk_generic_xmit(struct sock *sk) sent_frame = true; } + xs->tx->queue_empty_descs++; + out: if (sent_frame) sk->sk_write_space(sk); @@ -812,6 +816,12 @@ static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) ring->desc = offsetof(struct xdp_umem_ring, desc); } +struct xdp_statistics_v1 { + __u64 rx_dropped; + __u64 rx_invalid_descs; + __u64 tx_invalid_descs; +}; + static int xsk_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -831,19 +841,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, case XDP_STATISTICS: { struct xdp_statistics stats; + bool extra_stats = true; + size_t stats_size; - if (len < sizeof(stats)) + if (len < sizeof(struct xdp_statistics_v1)) { return -EINVAL; + } else if (len < sizeof(stats)) { + extra_stats = false; + stats_size = sizeof(struct xdp_statistics_v1); + } else { + stats_size = sizeof(stats); + } mutex_lock(&xs->mutex); stats.rx_dropped = xs->rx_dropped; + if (extra_stats) { + stats.rx_ring_full = xs->rx_queue_full; + stats.rx_fill_ring_empty_descs = + xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0; + stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx); + } else { + stats.rx_dropped += xs->rx_queue_full; + } stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx); stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx); mutex_unlock(&xs->mutex); - if (copy_to_user(optval, &stats, sizeof(stats))) + if (copy_to_user(optval, &stats, stats_size)) return -EFAULT; - if (put_user(sizeof(stats), optlen)) + if (put_user(stats_size, optlen)) return -EFAULT; return 0; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 540ed75e4482..89cf3551d3e9 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -235,6 +235,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) for (;;) { if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { + pool->fq->queue_empty_descs++; xp_release(xskb); return NULL; } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 5b5d24d2dd37..bf42cfd74b89 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -38,6 +38,7 @@ struct xsk_queue { u32 cached_cons; struct xdp_ring *ring; u64 invalid_descs; + u64 queue_empty_descs; }; /* The structure of the shared state of the rings are the same as the @@ -354,6 +355,11 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) return q ? q->invalid_descs : 0; } +static inline u64 xskq_nb_queue_empty_descs(struct xsk_queue *q) +{ + return q ? q->queue_empty_descs : 0; +} + struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); -- cgit From 0d80cb4612aa32dc0faa17fa3ab6f96f33e2b4a7 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Wed, 8 Jul 2020 07:28:35 +0000 Subject: xsk: Add xdp statistics to xsk_diag Add xdp statistics to the information dumped through the xsk_diag interface Signed-off-by: Ciara Loftus Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200708072835.4427-4-ciara.loftus@intel.com --- net/xdp/xsk_diag.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 0163b26aaf63..21e9c2d123ee 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -76,6 +76,19 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) return err; } +static int xsk_diag_put_stats(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_diag_stats du = {}; + + du.n_rx_dropped = xs->rx_dropped; + du.n_rx_invalid = xskq_nb_invalid_descs(xs->rx); + du.n_rx_full = xs->rx_queue_full; + du.n_fill_ring_empty = xs->umem ? xskq_nb_queue_empty_descs(xs->umem->fq) : 0; + du.n_tx_invalid = xskq_nb_invalid_descs(xs->tx); + du.n_tx_ring_empty = xskq_nb_queue_empty_descs(xs->tx); + return nla_put(nlskb, XDP_DIAG_STATS, sizeof(du), &du); +} + static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, struct xdp_diag_req *req, struct user_namespace *user_ns, @@ -118,6 +131,10 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->xdiag_show & XDP_SHOW_STATS) && + xsk_diag_put_stats(xs, nlskb)) + goto out_nlmsg_trim; + mutex_unlock(&xs->mutex); nlmsg_end(nlskb, nlh); return 0; -- cgit From 15e522a7b110777b20612937c4424b5c448e7431 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:14:57 +0200 Subject: net: 9p: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Eric Van Hensbergen Cc: Latchesar Ionkov Cc: Dominique Martinet Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/9p/client.c | 2 +- net/9p/trans_rdma.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index fc1f3635e5dd..09f1ec589b80 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -811,7 +811,7 @@ reterr: * @uodata: source for zero copy write * @inlen: read buffer size * @olen: write buffer size - * @hdrlen: reader header size, This is the size of response protocol data + * @in_hdrlen: reader header size, This is the size of response protocol data * @fmt: protocol format string (see protocol.c) * * Returns request structure (which client must free using p9_tag_remove) diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index b21c3c209815..2885ff9c76f0 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -94,14 +94,15 @@ struct p9_trans_rdma { struct completion cm_done; }; +struct p9_rdma_req; + /** - * p9_rdma_context - Keeps track of in-process WR + * struct p9_rdma_context - Keeps track of in-process WR * * @busa: Bus address to unmap when the WR completes * @req: Keeps track of requests (send) * @rc: Keepts track of replies (receive) */ -struct p9_rdma_req; struct p9_rdma_context { struct ib_cqe cqe; dma_addr_t busa; @@ -112,7 +113,7 @@ struct p9_rdma_context { }; /** - * p9_rdma_opts - Collection of mount options + * struct p9_rdma_opts - Collection of mount options * @port: port of connection * @sq_depth: The requested depth of the SQ. This really doesn't need * to be any deeper than the number of threads used in the client -- cgit From e0a7f1fe0c60ae48101b0170a8780c1e40ffb94d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:14:58 +0200 Subject: net: can: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Oliver Hartkopp Cc: Marc Kleine-Budde Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/can/af_can.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 128d37a4c2e0..5c06404bdf3e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -410,6 +410,7 @@ static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask, /** * can_rx_register - subscribe CAN frames from a specific interface + * @net: the applicable net namespace * @dev: pointer to netdevice (NULL => subcribe from 'all' CAN devices list) * @can_id: CAN identifier (see description) * @mask: CAN mask (see description) @@ -498,6 +499,7 @@ static void can_rx_delete_receiver(struct rcu_head *rp) /** * can_rx_unregister - unsubscribe CAN frames from a specific interface + * @net: the applicable net namespace * @dev: pointer to netdevice (NULL => unsubscribe from 'all' CAN devices list) * @can_id: CAN identifier * @mask: CAN mask -- cgit From 8842500dd056b8ef9da77ba2bfc3275fd7e0a98f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:14:59 +0200 Subject: net: core: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index c02bae927812..eab4ebe3c21c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7908,6 +7908,7 @@ EXPORT_SYMBOL(netdev_bonding_info_change); /** * netdev_get_xmit_slave - Get the xmit slave of master device + * @dev: device * @skb: The packet * @all_slaves: assume all the slaves are active * -- cgit From d0b1101bb5c1224881bb58460311d458ff1350d0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:00 +0200 Subject: net: dccp: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Gerrit Renker Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dccp/ccids/lib/packet_history.c | 2 ++ net/dccp/feat.c | 6 ++++++ net/dccp/input.c | 1 + net/dccp/ipv4.c | 2 ++ net/dccp/options.c | 4 ++++ net/dccp/timer.c | 2 ++ 6 files changed, 17 insertions(+) (limited to 'net') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 0bef57b908fb..af08e2df7108 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -365,6 +365,7 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) /** * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against + * @h: The non-empty RX history object */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h) @@ -374,6 +375,7 @@ static inline struct tfrc_rx_hist_entry * /** * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry + * @h: The non-empty RX history object */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 9c3b5e056234..afc071ea1271 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -165,6 +165,8 @@ static const struct { /** * dccp_feat_index - Hash function to map feature number into array position + * @feat_num: feature to hash, one of %dccp_feature_numbers + * * Returns consecutive array index or -1 if the feature is not understood. */ static int dccp_feat_index(u8 feat_num) @@ -567,6 +569,8 @@ cloning_failed: /** * dccp_feat_valid_nn_length - Enforce length constraints on NN options + * @feat_num: feature to return length of, one of %dccp_feature_numbers + * * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only, * incoming options are accepted as long as their values are valid. */ @@ -1429,6 +1433,8 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, /** * dccp_feat_init - Seed feature negotiation with host-specific defaults + * @sk: Socket to initialize. + * * This initialises global defaults, depending on the value of the sysctls. * These can later be overridden by registering changes via setsockopt calls. * The last link in the chain is finalise_settings, to make sure that between diff --git a/net/dccp/input.c b/net/dccp/input.c index 6dce68a55964..bd9cfdb67436 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -715,6 +715,7 @@ EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** * dccp_sample_rtt - Validate and finalise computation of RTT sample + * @sk: socket structure * @delta: number of microseconds between packet and acknowledgment * * The routine is kept generic to work in different contexts. It should be diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d19557c6d04b..a7e989919c53 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -694,6 +694,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); /** * dccp_invalid_packet - check for malformed packets + * @skb: Packet to validate + * * Implements RFC 4340, 8.5: Step 1: Check header basics * Packets that fail these checks are ignored and do not receive Resets. */ diff --git a/net/dccp/options.c b/net/dccp/options.c index 3b42f5c6a63d..daa9eed92646 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -43,6 +43,7 @@ u64 dccp_decode_value_var(const u8 *bf, const u8 len) * dccp_parse_options - Parse DCCP options present in @skb * @sk: client|server|listening dccp socket (when @dreq != NULL) * @dreq: request socket to use during connection setup, or NULL + * @skb: frame to parse */ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct sk_buff *skb) @@ -471,6 +472,8 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) /** * dccp_insert_option_mandatory - Mandatory option (5.8.2) + * @skb: frame into which to insert option + * * Note that since we are using skb_push, this function needs to be called * _after_ inserting the option it is supposed to influence (stack order). */ @@ -486,6 +489,7 @@ int dccp_insert_option_mandatory(struct sk_buff *skb) /** * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb + * @skb: frame to insert feature negotiation option into * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R * @feat: one out of %dccp_feature_numbers * @val: NN value or SP array (preferred element first) to copy diff --git a/net/dccp/timer.c b/net/dccp/timer.c index c0b3672637c4..0e06dfc32273 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -216,6 +216,8 @@ out: /** * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface + * @data: Socket to act on + * * See the comments above %ccid_dequeueing_decision for supported modes. */ static void dccp_write_xmitlet(unsigned long data) -- cgit From aff53b23a9a74cf6729a6022e5dd22dd698bd9d9 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:01 +0200 Subject: net: decnet: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9eb7e4b62d9b..4cac31d22a50 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -494,6 +494,8 @@ static int dn_return_long(struct sk_buff *skb) /** * dn_route_rx_packet - Try and find a route for an incoming packet + * @net: The applicable net namespace + * @sk: Socket packet transmitted on * @skb: The packet to find a route for * * Returns: result of input function if route is found, error code otherwise -- cgit From 3628e3cbf9edc8381961394ccd6fb0bf049fdb69 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:02 +0200 Subject: net: ipv4: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Paul Moore Cc: Alexey Kuznetsov Cc: Eric Dumazet Signed-off-by: Andrew Lunn Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 6 ++++-- net/ipv4/ipmr.c | 3 +++ net/ipv4/tcp_input.c | 1 - net/ipv4/tcp_output.c | 2 ++ net/ipv4/tcp_timer.c | 2 +- net/ipv4/udp.c | 6 +++--- 6 files changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 0f1b9065c0a6..2eb71579f4d2 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -283,7 +283,7 @@ static int cipso_v4_cache_check(const unsigned char *key, /** * cipso_v4_cache_add - Add an entry to the CIPSO cache - * @skb: the packet + * @cipso_ptr: pointer to CIPSO IP option * @secattr: the packet's security attributes * * Description: @@ -1535,6 +1535,7 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) /** * cipso_v4_validate - Validate a CIPSO option + * @skb: the packet * @option: the start of the option, on error it is set to point to the error * * Description: @@ -2066,7 +2067,7 @@ void cipso_v4_sock_delattr(struct sock *sk) /** * cipso_v4_req_delattr - Delete the CIPSO option from a request socket - * @reg: the request socket + * @req: the request socket * * Description: * Removes the CIPSO option from a request socket, if present. @@ -2158,6 +2159,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) /** * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet * @skb: the packet + * @doi_def: the DOI structure * @secattr: the security attributes * * Description: diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index f5c7a58844a4..678639c01e48 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -636,7 +636,10 @@ static int call_ipmr_mfc_entry_notifiers(struct net *net, /** * vif_delete - Delete a VIF entry + * @mrt: Table to delete from + * @vifi: VIF identifier to delete * @notify: Set to 1, if the caller is a notifier_call + * @head: if unregistering the VIF, place it on this queue */ static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 61c808864f6b..b03ca68d4111 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4448,7 +4448,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) /** * tcp_try_coalesce - try to merge skb to prior one * @sk: socket - * @dest: destination queue * @to: prior buffer * @from: buffer to add in queue * @fragstolen: pointer to boolean diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d563efcee0d..dc0117013ba5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3337,6 +3337,8 @@ int tcp_send_synack(struct sock *sk) * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer + * foc: cookie for tcp fast open + * synack_type: Type of synback to prepare * * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ada046f425d2..0c08c420fbc2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -314,7 +314,7 @@ out: /** * tcp_delack_timer() - The TCP delayed ACK timeout handler - * @data: Pointer to the current socket. (gets casted to struct sock *) + * @t: Pointer to the timer. (gets casted to struct sock *) * * This function gets (indirectly) called when the kernel timer for a TCP packet * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 31530129f137..073d346f515c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2743,9 +2743,9 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, #endif /** * udp_poll - wait for a UDP event. - * @file - file struct - * @sock - socket - * @wait - poll table + * @file: - file struct + * @sock: - socket + * @wait: - poll table * * This is same as datagram poll, except for the special case of * blocking sockets. If application is using a blocking fd -- cgit From b51cd7c834dba0ec9300337e16e5aa5bf65bd04c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:03 +0200 Subject: net: ipv6: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 1 - net/ipv6/ip6_output.c | 6 ++++-- net/ipv6/ip6_tunnel.c | 10 ++++++---- net/ipv6/udp.c | 3 +++ 4 files changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index e9b366994475..374105e4394f 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -1232,7 +1232,6 @@ static void ipv6_renew_option(int renewtype, * @opt: original options * @newtype: option type to replace in @opt * @newopt: new option of type @newtype to replace (user-mem) - * @newoptlen: length of @newopt * * Returns a new set of options which is a copy of @opt with the * option type @newtype replaced with @newopt. diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8a8c2d0cfcc8..c78e67d7747f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1118,6 +1118,7 @@ out_err_release: /** * ip6_dst_lookup - perform route lookup on flow + * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @dst: pointer to dst_entry * for result * @fl6: flow to lookup @@ -1136,6 +1137,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** * ip6_dst_lookup_flow - perform route lookup on flow with ipsec + * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup @@ -1202,11 +1204,11 @@ EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); * @skb: Packet for which lookup is done * @dev: Tunnel device * @net: Network namespace of tunnel device - * @sk: Socket which provides route info + * @sock: Socket which provides route info * @saddr: Memory to store the src ip address * @info: Tunnel information * @protocol: IP protocol - * @use_cahce: Flag to enable cache usage + * @use_cache: Flag to enable cache usage * This function performs a route lookup on a tunnel * * It returns a valid dst pointer and stores src address to be used in diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a18c378ca5f4..f635914f42ec 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -124,8 +124,12 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) return &dev->stats; } +#define for_each_ip6_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses + * @net: network namespace * @link: ifindex of underlying interface * @remote: the address of the tunnel exit-point * @local: the address of the tunnel entry-point @@ -136,9 +140,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * else %NULL **/ -#define for_each_ip6_tunnel_rcu(start) \ - for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) - static struct ip6_tnl * ip6_tnl_lookup(struct net *net, int link, const struct in6_addr *remote, const struct in6_addr *local) @@ -302,8 +303,8 @@ out: /** * ip6_tnl_create - create a new tunnel + * @net: network namespace * @p: tunnel parameters - * @pt: pointer to new tunnel * * Description: * Create tunnel matching given parameters. @@ -351,6 +352,7 @@ failed: /** * ip6_tnl_locate - find or create tunnel matching given parameters + * @net: network namespace * @p: tunnel parameters * @create: != 0 if allowed to create new tunnel if no match found * diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7d4151747340..38c0d9350c6b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1059,6 +1059,9 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, * @sk: socket we are sending on * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) + * @saddr: source address + * @daddr: destination address + * @len: length of packet */ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, const struct in6_addr *saddr, -- cgit From 74c950c966c180c599d0af3043ab8959e74493d5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:04 +0200 Subject: net: llc: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/llc/af_llc.c | 1 - net/llc/llc_conn.c | 7 ++++--- net/llc/llc_input.c | 1 + net/llc/llc_pdu.c | 2 +- net/llc/llc_sap.c | 3 +++ 5 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6e53e43c1907..6140a3e46c26 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -984,7 +984,6 @@ out: * llc_ui_getname - return the address info of a socket * @sock: Socket to get address of. * @uaddr: Address structure to return information. - * @uaddrlen: Length of address structure. * @peer: Does user want local or remote address information. * * Return the address information of a socket. diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 7b620acaca9e..1144cda2a0fc 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -284,8 +284,8 @@ out:; /** * llc_conn_remove_acked_pdus - Removes acknowledged pdus from tx queue * @sk: active connection - * nr: NR - * how_many_unacked: size of pdu_unack_q after removing acked pdus + * @nr: NR + * @how_many_unacked: size of pdu_unack_q after removing acked pdus * * Removes acknowledged pdus from transmit queue (pdu_unack_q). Returns * the number of pdus that removed from queue. @@ -906,6 +906,7 @@ static void llc_sk_init(struct sock *sk) /** * llc_sk_alloc - Allocates LLC sock + * @net: network namespace * @family: upper layer protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @@ -951,7 +952,7 @@ void llc_sk_stop_all_timers(struct sock *sk, bool sync) /** * llc_sk_free - Frees a LLC socket - * @sk - socket to free + * @sk: - socket to free * * Frees a LLC socket */ diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 82cb93f66b9b..c309b72a5877 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -144,6 +144,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) * @skb: received pdu * @dev: device that receive pdu * @pt: packet type + * @orig_dev: the original receive net device * * When the system receives a 802.2 frame this function is called. It * checks SAP and connection of received pdu and passes frame to diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c index 2e6cb79196bb..792d195c8bae 100644 --- a/net/llc/llc_pdu.c +++ b/net/llc/llc_pdu.c @@ -25,7 +25,7 @@ void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type) /** * pdu_set_pf_bit - sets poll/final bit in LLC header - * @pdu_frame: input frame that p/f bit must be set into it. + * @skb: Frame to set bit in * @bit_value: poll/final bit (0 or 1). * * This function sets poll/final bit in LLC header (based on type of PDU). diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index be419062e19a..6805ce43a055 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -37,6 +37,7 @@ static int llc_mac_header_len(unsigned short devtype) /** * llc_alloc_frame - allocates sk_buff for frame + * @sk: socket to allocate frame to * @dev: network device this skb will be sent over * @type: pdu type to allocate * @data_size: data size to allocate @@ -273,6 +274,7 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, * llc_sap_rcv - sends received pdus to the sap state machine * @sap: current sap component structure. * @skb: received frame. + * @sk: socket to associate to frame * * Sends received pdus to the sap state machine. */ @@ -379,6 +381,7 @@ static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb, * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. * @sap: SAP * @laddr: address of local LLC (MAC + SAP) + * @skb: PDU to deliver * * Search socket list of the SAP and finds connections with same sap. * Deliver clone to each. -- cgit From 9fd00b4d0eccdb859ba8a0c3228fadfb081441a0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:05 +0200 Subject: net: mac80211: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Johannes Berg Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/mac80211/mesh_pathtbl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 117519bf33d6..fe4e853c61f4 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -72,7 +72,6 @@ static void mesh_table_free(struct mesh_table *tbl) } /** - * * mesh_path_assign_nexthop - update mesh path next hop * * @mpath: mesh path to update @@ -140,7 +139,6 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, } /** - * * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * * This function is used to transfer or copy frames from an unresolved mpath to @@ -152,7 +150,7 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, * * The gate mpath must be an active mpath with a valid mpath->next_hop. * - * @mpath: An active mpath the frames will be sent to (i.e. the gate) + * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) * @from_mpath: The failed mpath * @copy: When true, copy all the frames to the new mpath queue. When false, * move them. -- cgit From 3db86c397f608b58b842d7f0f3d7ce305b46e6ad Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:06 +0200 Subject: net: netfilter: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_tables_api.c | 8 ++++---- net/netfilter/nft_set_pipapo.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f33d72c5b06e..358108ff0833 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1006,7 +1006,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) * * @skb: skb that causes the clash * @h: tuplehash of the clashing entry already in table - * @hash_reply: hash slot for reply direction + * @reply_hash: hash slot for reply direction * * A conntrack entry can be inserted to the connection tracking table * if there is no existing entry with an identical tuple. diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f96785586f64..6708a4f2eec8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2375,7 +2375,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, /** * nft_register_expr - register nf_tables expr type - * @ops: expr type + * @type: expr type * * Registers the expr type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. @@ -2394,7 +2394,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr); /** * nft_unregister_expr - unregister nf_tables expr type - * @ops: expr type + * @type: expr type * * Unregisters the expr typefor use with nf_tables. */ @@ -5595,7 +5595,7 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, /** * nft_register_obj- register nf_tables stateful object type - * @obj: object type + * @obj_type: object type * * Registers the object type for use with nf_tables. Returns zero on * success or a negative errno code otherwise. @@ -5614,7 +5614,7 @@ EXPORT_SYMBOL_GPL(nft_register_obj); /** * nft_unregister_obj - unregister nf_tables object type - * @obj: object type + * @obj_type: object type * * Unregisters the object type for use with nf_tables. */ diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 313de1d73168..cc6082a5f7ad 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -401,7 +401,7 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, * nft_pipapo_lookup() - Lookup function * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @key: nftables API element representation containing key data * @ext: nftables API extension pointer, filled with matching reference * * For more details, see DOC: Theory of Operation. @@ -1075,7 +1075,7 @@ out: * @m: Matching data, including mapping table * @map: Table of rule maps: array of first rule and amount of rules * in next field a given rule maps to, for each field - * @ext: For last field, nft_set_ext pointer matching rules map to + * @e: For last field, nft_set_ext pointer matching rules map to */ static void pipapo_map(struct nft_pipapo_match *m, union nft_pipapo_map_bucket map[NFT_PIPAPO_MAX_FIELDS], @@ -1099,7 +1099,7 @@ static void pipapo_map(struct nft_pipapo_match *m, /** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions - * @bsize_max Maximum bucket size, scratch maps cover two buckets + * @bsize_max: Maximum bucket size, scratch maps cover two buckets * * Return: 0 on success, -ENOMEM on failure. */ @@ -1447,7 +1447,7 @@ static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules, /** * pipapo_drop() - Delete entry from lookup and mapping tables, given rule map * @m: Matching data - * @rulemap Table of rule maps, arrays of first rule and amount of rules + * @rulemap: Table of rule maps, arrays of first rule and amount of rules * in next field a given entry maps to, for each field * * For each rule in lookup table buckets mapping to this set of rules, drop -- cgit From 26c3baaa0956ffabc1e3ede7f6933bd0dc9cb373 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:07 +0200 Subject: net: netlabel: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Paul Moore Signed-off-by: Andrew Lunn Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_domainhash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index a1f2320ecc16..d07de2c0fbc7 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -92,7 +92,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) /** * netlbl_domhsh_hash - Hashing function for the domain hash table - * @domain: the domain name to hash + * @key: the domain name to hash * * Description: * This is the hashing function for the domain hash table, it returns the -- cgit From ffbab1c93b52b3405183eb3643664d8a2e9b38ea Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:08 +0200 Subject: net: nfc: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/nfc/core.c | 3 +-- net/nfc/nci/core.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index c5f9c3ee82f8..eb377f87bcae 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -704,7 +704,6 @@ EXPORT_SYMBOL(nfc_tm_deactivated); * nfc_alloc_send_skb - allocate a skb for data exchange responses * * @size: size to allocate - * @gfp: gfp flags */ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, unsigned int flags, unsigned int size, @@ -749,7 +748,7 @@ EXPORT_SYMBOL(nfc_alloc_recv_skb); * * @dev: The nfc device that found the targets * @targets: array of nfc targets found - * @ntargets: targets array size + * @n_targets: targets array size * * The device driver must call this function when one or many nfc targets * are found. After calling this function, the device driver must stop diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7cd524884304..f7b7dc5fe84a 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1182,7 +1182,7 @@ EXPORT_SYMBOL(nci_free_device); /** * nci_register_device - register a nci device in the nfc subsystem * - * @dev: The nci device to register + * @ndev: The nci device to register */ int nci_register_device(struct nci_dev *ndev) { @@ -1246,7 +1246,7 @@ EXPORT_SYMBOL(nci_register_device); /** * nci_unregister_device - unregister a nci device in the nfc subsystem * - * @dev: The nci device to unregister + * @ndev: The nci device to unregister */ void nci_unregister_device(struct nci_dev *ndev) { -- cgit From 966785142342f473015f92be9e62554a29105599 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:09 +0200 Subject: net: openvswitch: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Pravin B Shelar Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 6 +++--- net/openvswitch/vport.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 79252d4887ff..9d3e50c4d29f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1763,11 +1763,11 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * does not include any don't care bit. * @net: Used to determine per-namespace field support. * @match: receives the extracted flow match information. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. The fields should of the packet that triggered the creation * of this flow. - * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink - * attribute specifies the mask field of the wildcarded flow. + * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* + * Netlink attribute specifies the mask field of the wildcarded flow. * @log: Boolean to allow kernel error logging. Normally true, but when * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 47febb4504f0..0d44c5c013fa 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -87,6 +87,7 @@ EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); /** * ovs_vport_locate - find a port that has already been created * + * @net: network namespace * @name: name of port to find * * Must be called with ovs or RCU read lock. @@ -418,7 +419,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) * * @vport: vport that received the packet * @skb: skb that was received - * @tun_key: tunnel (if any) that carried packet + * @tun_info: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. -- cgit From 76f2fe73c5b888fd009aa16efad9a6b563a1aa1f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:10 +0200 Subject: net: rxrpc: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: David Howells Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 394189b81849..cd7d0d204c74 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -267,7 +267,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @upgrade: Request service upgrade for call - * @intr: The call is interruptible + * @interruptibility: The call is interruptible, or can be canceled. * @debug_id: The debug ID for tracing to be assigned to the call * * Allow a kernel service to begin a call on the nominated socket. This just -- cgit From 90ac5d0301db7cefc4129f89a858f66913a1c8f4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:11 +0200 Subject: net: sched: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/sched/em_canid.c | 1 + net/sched/ematch.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index b9a94fdf9397..5ea84decec19 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -40,6 +40,7 @@ struct canid_match { /** * em_canid_get_id() - Extracts Can ID out of the sk_buff structure. + * @skb: buffer to extract Can ID from */ static canid_t em_canid_get_id(struct sk_buff *skb) { diff --git a/net/sched/ematch.c b/net/sched/ematch.c index dd3b8c11a2e0..f885bea5b452 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -389,7 +389,6 @@ EXPORT_SYMBOL(tcf_em_tree_validate); /** * tcf_em_tree_destroy - destroy an ematch tree * - * @tp: classifier kind handle * @tree: ematch tree to be deleted * * This functions destroys an ematch tree previously created by @@ -425,7 +424,7 @@ EXPORT_SYMBOL(tcf_em_tree_destroy); * tcf_em_tree_dump - dump ematch tree into a rtnl message * * @skb: skb holding the rtnl message - * @t: ematch tree to be dumped + * @tree: ematch tree to be dumped * @tlv: TLV type to be used to encapsulate the tree * * This function dumps a ematch tree into a rtnl message. It is valid to -- cgit From 9a8ad9ac810aaddbb805c048511624a4972b24cc Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:12 +0200 Subject: net: socket: Move kerneldoc next to function it documents Fix the warning "Function parameter or member 'inode' not described in '__sock_release'' due to the kerneldoc being placed before __sock_release() not sock_release(), which does not take an inode parameter. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/socket.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index d87812a9ed4b..770503c4ca76 100644 --- a/net/socket.c +++ b/net/socket.c @@ -586,15 +586,6 @@ struct socket *sock_alloc(void) } EXPORT_SYMBOL(sock_alloc); -/** - * sock_release - close a socket - * @sock: socket to close - * - * The socket is released from the protocol stack if it has a release - * callback, and the inode is then released if the socket is bound to - * an inode not a file. - */ - static void __sock_release(struct socket *sock, struct inode *inode) { if (sock->ops) { @@ -620,6 +611,14 @@ static void __sock_release(struct socket *sock, struct inode *inode) sock->file = NULL; } +/** + * sock_release - close a socket + * @sock: socket to close + * + * The socket is released from the protocol stack if it has a release + * callback, and the inode is then released if the socket is bound to + * an inode not a file. + */ void sock_release(struct socket *sock) { __sock_release(sock, NULL); -- cgit From c8af73f0b23b75468e0f511287d15bf3051e88c8 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:13 +0200 Subject: net: switchdev: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Jiri Pirko Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f25604d68337..865f3e037425 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -304,8 +304,8 @@ static int switchdev_port_obj_add_defer(struct net_device *dev, * switchdev_port_obj_add - Add port object * * @dev: port device - * @id: object ID * @obj: object to add + * @extack: netlink extended ack * * Use a 2-phase prepare-commit transaction model to ensure * system is not left in a partially updated state due to @@ -357,7 +357,6 @@ static int switchdev_port_obj_del_defer(struct net_device *dev, * switchdev_port_obj_del - Delete port object * * @dev: port device - * @id: object ID * @obj: object to delete * * rtnl_lock must be held and must not be in atomic section, -- cgit From d8141208b032eaee0efeacaadf1734f65db73ac5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:14 +0200 Subject: net: tipc: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Andrew Lunn Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- net/tipc/discover.c | 5 ++--- net/tipc/link.c | 6 +++--- net/tipc/msg.c | 2 +- net/tipc/node.c | 4 ++-- net/tipc/socket.c | 8 +++----- net/tipc/udp_media.c | 2 +- 7 files changed, 13 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e366ec9a7e4d..808b147df7d5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -595,7 +595,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface - * @buf: the received packet + * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond diff --git a/net/tipc/discover.c b/net/tipc/discover.c index bfe43da127c0..d4ecacddb40c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -74,7 +74,7 @@ struct tipc_discoverer { /** * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace - * @type: message type (request or response) + * @mtyp: message type (request or response) * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, @@ -339,7 +339,7 @@ exit: * @net: the applicable net namespace * @b: ptr to bearer issuing requests * @dest: destination address for request messages - * @dest_domain: network domain to which links can be established + * @skb: pointer to created frame * * Returns 0 if successful, otherwise -errno. */ @@ -393,7 +393,6 @@ void tipc_disc_delete(struct tipc_discoverer *d) * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace * @b: ptr to bearer issuing requests - * @dest_domain: network domain to which links can be established */ void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { diff --git a/net/tipc/link.c b/net/tipc/link.c index f1d9c33dae72..d46d4ee5c4fd 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -445,7 +445,7 @@ u32 tipc_link_state(struct tipc_link *l) /** * tipc_link_create - create a new link - * @n: pointer to associated node + * @net: pointer to associated network namespace * @if_name: associated interface name * @bearer_id: id (index) of associated bearer * @tolerance: link tolerance to be used by link @@ -530,7 +530,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast - * @n: pointer to associated node + * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers * @window: send window to be used * @inputq: queue to put messages ready for delivery @@ -989,7 +989,7 @@ void tipc_link_reset(struct tipc_link *l) /** * tipc_link_xmit(): enqueue buffer list according to queue situation - * @link: link to use + * @l: link to use * @list: chain of buffers containing message * @xmitq: returned list of packets to be sent by caller * diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 01b64869a173..848fae674532 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -202,7 +202,7 @@ err: /** * tipc_msg_append(): Append data to tail of an existing buffer queue - * @hdr: header to be used + * @_hdr: header to be used * @m: the data to be appended * @mss: max allowable size of buffer * @dlen: size of data to be appended diff --git a/net/tipc/node.c b/net/tipc/node.c index 030a51c4d1fa..4edcee3088da 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1515,7 +1515,7 @@ static void node_lost_contact(struct tipc_node *n, * tipc_node_get_linkname - get the name of a link * * @bearer_id: id of the bearer - * @node: peer node address + * @addr: peer node address * @linkname: link name output buffer * * Returns 0 on success @@ -2022,7 +2022,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace * @skb: TIPC packet - * @bearer: pointer to bearer message arrived on + * @b: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a94f38333698..fc388cef6471 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -711,7 +711,6 @@ exit: * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address - * @uaddr_len: area for returned length of socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * * Returns 0 on success, errno otherwise @@ -1053,7 +1052,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /** * tipc_send_group_bcast - send message to all members in communication group - * @sk: socket structure + * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup @@ -1673,7 +1672,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, /** * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @hdr: received message header + * @skb: received message * * Note: Address is not captured if not requested by receiver. */ @@ -2095,7 +2094,6 @@ static void tipc_write_space(struct sock *sk) /** * tipc_data_ready - wake up threads to indicate messages have been received * @sk: socket - * @len: the length of messages */ static void tipc_data_ready(struct sock *sk) { @@ -2677,7 +2675,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) /** * tipc_accept - wait for connection request * @sock: listening socket - * @newsock: new socket that is to be connected + * @new_sock: new socket that is to be connected * @flags: file-related flags associated with socket * * Returns 0 on success, errno otherwise diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 28a283f26a8d..d91b7c543e39 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -565,7 +565,7 @@ msg_full: /** * tipc_parse_udp_addr - build udp media address from netlink data - * @nlattr: netlink attribute containing sockaddr storage aligned address + * @nla: netlink attribute containing sockaddr storage aligned address * @addr: tipc media address to fill with address, port and protocol type * @scope_id: IPv6 scope id pointer, not NULL indicates it's required */ -- cgit From 726e6af9af44b39cb0860667095fbce3a489ef42 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:15 +0200 Subject: net: wireless: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Johannes Berg Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/wireless/reg.c | 4 +++- net/wireless/wext-compat.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0d74a31ef0ab..35b8847a2f6d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2384,7 +2384,7 @@ static void reg_set_request_processed(void) /** * reg_process_hint_core - process core regulatory requests - * @pending_request: a pending core regulatory request + * @core_request: a pending core regulatory request * * The wireless subsystem can use this function to process * a regulatory request issued by the regulatory core. @@ -2493,6 +2493,7 @@ __reg_process_hint_driver(struct regulatory_request *driver_request) /** * reg_process_hint_driver - process driver regulatory requests + * @wiphy: the wireless device for the regulatory request * @driver_request: a pending driver regulatory request * * The wireless subsystem can use this function to process @@ -2593,6 +2594,7 @@ __reg_process_hint_country_ie(struct wiphy *wiphy, /** * reg_process_hint_country_ie - process regulatory requests from country IEs + * @wiphy: the wireless device for the regulatory request * @country_ie_request: a regulatory request from a country IE * * The wireless subsystem can use this function to process diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index cac9e28d852b..aa918d7ff6bd 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -220,7 +220,6 @@ EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange); /** * cfg80211_wext_freq - get wext frequency for non-"auto" - * @dev: the net device * @freq: the wext freq encoding * * Returns a frequency, or a negative error code, or 0 for auto. -- cgit From 62c89238b182101dab0ae106f4c68061512c41f5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:16 +0200 Subject: net: x25: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Andrew Hendry Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/x25/x25_link.c | 2 +- net/x25/x25_route.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 7d02532aad0d..fdae054b7dc1 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -270,7 +270,7 @@ void x25_link_device_up(struct net_device *dev) /** * __x25_remove_neigh - remove neighbour from x25_neigh_list - * @nb - neigh to remove + * @nb: - neigh to remove * * Remove neighbour from x25_neigh_list. If it was there. * Caller must hold x25_neigh_list_lock. diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index b8e94d58d0f1..00e46c9a5280 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -142,7 +142,7 @@ struct net_device *x25_dev_get(char *devname) /** * x25_get_route - Find a route given an X.25 address. - * @addr - address to find a route for + * @addr: - address to find a route for * * Find a route given an X.25 address. */ -- cgit From c40f4e50b6cfc7c66f69d12c6b3fbcd954f1ded5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sat, 11 Jul 2020 00:55:03 +0300 Subject: net: sched: Pass qdisc reference in struct flow_block_offload Previously, shared blocks were only relevant for the pseudo-qdiscs ingress and clsact. Recently, a qevent facility was introduced, which allows to bind blocks to well-defined slots of a qdisc instance. RED in particular got two qevents: early_drop and mark. Drivers that wish to offload these blocks will be sent the usual notification, and need to know which qdisc it is related to. To that end, extend flow_block_offload with a "sch" pointer, and initialize as appropriate. This prompts changes in the indirect block facility, which now tracks the scheduler in addition to the netdevice. Update signatures of several functions similarly. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/core/flow_offload.c | 12 +++++++----- net/netfilter/nf_flow_table_offload.c | 2 +- net/netfilter/nf_tables_offload.c | 2 +- net/sched/cls_api.c | 16 +++++++++------- 4 files changed, 18 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index b739cfab796e..b8cf6ff5f961 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -429,7 +429,7 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, struct Qdisc *sch, void *data, void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { @@ -437,6 +437,7 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block, flow_block->indr.data = data; flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; + flow_block->indr.sch = sch; flow_block->indr.cleanup = cleanup; } @@ -444,7 +445,8 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv), struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, + struct Qdisc *sch, void *data, void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { @@ -454,7 +456,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, if (IS_ERR(block_cb)) goto out; - flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); + flow_block_indr_init(block_cb, bo, dev, sch, data, indr_cb_priv, cleanup); list_add(&block_cb->indr.list, &flow_block_indr_list); out: @@ -462,7 +464,7 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -int flow_indr_dev_setup_offload(struct net_device *dev, +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) @@ -471,7 +473,7 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo, data, cleanup); + this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); mutex_unlock(&flow_indr_block_lock); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 5fff1e040168..2a6993fa40d7 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -964,7 +964,7 @@ static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); - return flow_indr_dev_setup_offload(dev, TC_SETUP_FT, flowtable, bo, + return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo, nf_flow_table_indr_cleanup); } diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index c7cf1cde46de..9ef37c1b7b3b 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -312,7 +312,7 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack); - err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, basechain, &bo, + err = flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_BLOCK, basechain, &bo, nft_indr_block_cleanup); if (err < 0) return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e617f3e27ec0..322b279154de 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -622,7 +622,7 @@ static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); static void tcf_block_offload_init(struct flow_block_offload *bo, - struct net_device *dev, + struct net_device *dev, struct Qdisc *sch, enum flow_block_command command, enum flow_block_binder_type binder_type, struct flow_block *flow_block, @@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo, bo->block = flow_block; bo->block_shared = shared; bo->extack = extack; + bo->sch = sch; INIT_LIST_HEAD(&bo->cb_list); } @@ -644,10 +645,11 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) { struct tcf_block *block = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; + struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; struct flow_block_offload bo; - tcf_block_offload_init(&bo, dev, FLOW_BLOCK_UNBIND, + tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, &block->flow_block, tcf_block_shared(block), &extack); @@ -666,14 +668,14 @@ static bool tcf_block_offload_in_use(struct tcf_block *block) } static int tcf_block_offload_cmd(struct tcf_block *block, - struct net_device *dev, + struct net_device *dev, struct Qdisc *sch, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { struct flow_block_offload bo = {}; - tcf_block_offload_init(&bo, dev, command, ei->binder_type, + tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type, &block->flow_block, tcf_block_shared(block), extack); @@ -690,7 +692,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block, return tcf_block_setup(block, &bo); } - flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block, &bo, + flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo, tc_block_indr_cleanup); tcf_block_setup(block, &bo); @@ -717,7 +719,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, goto err_unlock; } - err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); + err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) @@ -744,7 +746,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, int err; down_write(&block->cb_lock); - err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); + err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; up_write(&block->cb_lock); -- cgit From ce1e2a776ffcd5ce1570bf8f077ea0c26c1d8a8d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 13 Jul 2020 22:23:44 +0800 Subject: net: make symbol 'flush_works' static The sparse tool complains as follows: net/core/dev.c:5594:1: warning: symbol '__pcpu_scope_flush_works' was not declared. Should it be static? 'flush_works' is not used outside of dev.c, so marks it static. Fixes: 41852497a9205 ("net: batch calls to flush_all_backlogs()") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index eab4ebe3c21c..b61075828358 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5591,7 +5591,7 @@ void netif_receive_skb_list(struct list_head *head) } EXPORT_SYMBOL(netif_receive_skb_list); -DEFINE_PER_CPU(struct work_struct, flush_works); +static DEFINE_PER_CPU(struct work_struct, flush_works); /* Network device is going away, flush any packets still pending */ static void flush_backlog(struct work_struct *work) -- cgit From 5d037b4d3df7b77ecd22fa8e10f5000bdc42cc8b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 13 Jul 2020 18:20:14 +0300 Subject: devlink: Fix use-after-free when destroying health reporters Dereferencing the reporter after it was destroyed in order to unlock the reporters lock results in a use-after-free [1]. Fix this by storing a pointer to the lock in a local variable before destroying the reporter. [1] ================================================================== BUG: KASAN: use-after-free in devlink_health_reporter_destroy+0x15c/0x1b0 net/core/devlink.c:5476 Read of size 8 at addr ffff8880650fd020 by task syz-executor.1/904 CPU: 0 PID: 904 Comm: syz-executor.1 Not tainted 5.8.0-rc2+ #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xf6/0x16e lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 devlink_health_reporter_destroy+0x15c/0x1b0 net/core/devlink.c:5476 nsim_dev_health_exit+0x8b/0xe0 drivers/net/netdevsim/health.c:317 nsim_dev_reload_destroy+0x7f/0x110 drivers/net/netdevsim/dev.c:1134 nsim_dev_reload_down+0x6e/0xd0 drivers/net/netdevsim/dev.c:712 devlink_reload+0xc6/0x3b0 net/core/devlink.c:2952 devlink_nl_cmd_reload+0x2f1/0x7c0 net/core/devlink.c:2987 genl_family_rcv_msg_doit net/netlink/genetlink.c:691 [inline] genl_family_rcv_msg net/netlink/genetlink.c:736 [inline] genl_rcv_msg+0x611/0x9d0 net/netlink/genetlink.c:753 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2469 genl_rcv+0x24/0x40 net/netlink/genetlink.c:764 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0x150/0x190 net/socket.c:672 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2363 ___sys_sendmsg+0xff/0x170 net/socket.c:2417 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2450 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x4748ad Code: Bad RIP value. RSP: 002b:00007fd0358adc38 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000056bf00 RCX: 00000000004748ad RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000003 RBP: 00000000ffffffff R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004d1a4b R14: 00007fd0358ae6b4 R15: 00007fd0358add80 Allocated by task 539: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:467 kmalloc include/linux/slab.h:555 [inline] kzalloc include/linux/slab.h:669 [inline] __devlink_health_reporter_create+0x91/0x2f0 net/core/devlink.c:5359 devlink_health_reporter_create+0xa1/0x170 net/core/devlink.c:5431 nsim_dev_health_init+0x95/0x3a0 drivers/net/netdevsim/health.c:279 nsim_dev_probe+0xb1e/0xeb0 drivers/net/netdevsim/dev.c:1086 really_probe+0x287/0x6d0 drivers/base/dd.c:525 driver_probe_device+0xfe/0x1d0 drivers/base/dd.c:701 __device_attach_driver+0x21e/0x290 drivers/base/dd.c:807 bus_for_each_drv+0x161/0x1e0 drivers/base/bus.c:431 __device_attach+0x21a/0x360 drivers/base/dd.c:873 bus_probe_device+0x1e6/0x290 drivers/base/bus.c:491 device_add+0xaf2/0x1b00 drivers/base/core.c:2680 nsim_bus_dev_new drivers/net/netdevsim/bus.c:336 [inline] new_device_store+0x374/0x590 drivers/net/netdevsim/bus.c:215 bus_attr_store+0x75/0xa0 drivers/base/bus.c:122 sysfs_kf_write+0x113/0x170 fs/sysfs/file.c:138 kernfs_fop_write+0x25d/0x480 fs/kernfs/file.c:315 __vfs_write+0x7c/0x100 fs/read_write.c:495 vfs_write+0x265/0x5e0 fs/read_write.c:559 ksys_write+0x12d/0x250 fs/read_write.c:612 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 904: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x12c/0x170 mm/kasan/common.c:455 slab_free_hook mm/slub.c:1474 [inline] slab_free_freelist_hook mm/slub.c:1507 [inline] slab_free mm/slub.c:3072 [inline] kfree+0xe6/0x320 mm/slub.c:4063 devlink_health_reporter_free net/core/devlink.c:5449 [inline] devlink_health_reporter_put+0xb7/0xf0 net/core/devlink.c:5456 __devlink_health_reporter_destroy net/core/devlink.c:5463 [inline] devlink_health_reporter_destroy+0x11b/0x1b0 net/core/devlink.c:5475 nsim_dev_health_exit+0x8b/0xe0 drivers/net/netdevsim/health.c:317 nsim_dev_reload_destroy+0x7f/0x110 drivers/net/netdevsim/dev.c:1134 nsim_dev_reload_down+0x6e/0xd0 drivers/net/netdevsim/dev.c:712 devlink_reload+0xc6/0x3b0 net/core/devlink.c:2952 devlink_nl_cmd_reload+0x2f1/0x7c0 net/core/devlink.c:2987 genl_family_rcv_msg_doit net/netlink/genetlink.c:691 [inline] genl_family_rcv_msg net/netlink/genetlink.c:736 [inline] genl_rcv_msg+0x611/0x9d0 net/netlink/genetlink.c:753 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2469 genl_rcv+0x24/0x40 net/netlink/genetlink.c:764 netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1329 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1918 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0x150/0x190 net/socket.c:672 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2363 ___sys_sendmsg+0xff/0x170 net/socket.c:2417 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2450 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff8880650fd000 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 32 bytes inside of 512-byte region [ffff8880650fd000, ffff8880650fd200) The buggy address belongs to the page: page:ffffea0001943f00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff8880650ff800 head:ffffea0001943f00 order:2 compound_mapcount:0 compound_pincount:0 flags: 0x100000000010200(slab|head) raw: 0100000000010200 ffffea0001a06a08 ffffea00010ad308 ffff88806c402500 raw: ffff8880650ff800 0000000000100009 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880650fcf00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880650fcf80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880650fd000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880650fd080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880650fd100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 3c5584bf0a04 ("devlink: Rework devlink health reporter destructor") Fixes: 15c724b997a8 ("devlink: Add devlink health port reporters API") Signed-off-by: Ido Schimmel Reviewed-by: Moshe Shemesh Reviewed-by: Jiri Pirko Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/devlink.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 20a83aace642..6335e1851088 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5471,9 +5471,11 @@ __devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { - mutex_lock(&reporter->devlink->reporters_lock); + struct mutex *lock = &reporter->devlink->reporters_lock; + + mutex_lock(lock); __devlink_health_reporter_destroy(reporter); - mutex_unlock(&reporter->devlink->reporters_lock); + mutex_unlock(lock); } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); @@ -5485,9 +5487,11 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); void devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter) { - mutex_lock(&reporter->devlink_port->reporters_lock); + struct mutex *lock = &reporter->devlink_port->reporters_lock; + + mutex_lock(lock); __devlink_health_reporter_destroy(reporter); - mutex_unlock(&reporter->devlink_port->reporters_lock); + mutex_unlock(lock); } EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy); -- cgit From 67c2404922c2c3f9cc0898aafaa4e3bea2bde084 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 13 Jul 2020 19:57:04 +0300 Subject: net: dsa: felix: create a template for the DSA tags on xmit With this patch we try to kill 2 birds with 1 stone. First of all, some switches that use tag_ocelot.c don't have the exact same bitfield layout for the DSA tags. The destination ports field is different for Seville VSC9953 for example. So the choices are to either duplicate tag_ocelot.c into a new tag_seville.c (sub-optimal) or somehow take into account a supposed ocelot->dest_ports_offset when packing this field into the DSA injection header (again not ideal). Secondly, tag_ocelot.c already needs to memset a 128-bit area to zero and call some packing() functions of dubious performance in the fastpath. And most of the values it needs to pack are pretty much constant (BYPASS=1, SRC_PORT=CPU, DEST=port index). So it would be good if we could improve that. The proposed solution is to allocate a memory area per port at probe time, initialize that with the statically defined bits as per chip hardware revision, and just perform a simpler memcpy in the fastpath. Other alternatives have been analyzed, such as: - Create a separate tag_seville.c: too much code duplication for just 1 bit field difference. - Create a separate DSA_TAG_PROTO_SEVILLE under tag_ocelot.c, just like tag_brcm.c, which would have a separate .xmit function. Again, too much code duplication for just 1 bit field difference. - Allocate the template from the init function of the tag_ocelot.c module, instead of from the driver: couldn't figure out a method of accessing the correct port template corresponding to the correct tagger in the .xmit function. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/tag_ocelot.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index b0c98ee4e13b..42f327c06dca 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -137,11 +137,10 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { struct dsa_port *dp = dsa_slave_to_port(netdev); - u64 bypass, dest, src, qos_class, rew_op; struct dsa_switch *ds = dp->ds; - int port = dp->index; struct ocelot *ocelot = ds->priv; - struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct ocelot_port *ocelot_port; + u64 qos_class, rew_op; u8 *injection; if (unlikely(skb_cow_head(skb, OCELOT_TAG_LEN) < 0)) { @@ -149,19 +148,15 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, return NULL; } - injection = skb_push(skb, OCELOT_TAG_LEN); + ocelot_port = ocelot->ports[dp->index]; - memset(injection, 0, OCELOT_TAG_LEN); + injection = skb_push(skb, OCELOT_TAG_LEN); - /* Set the source port as the CPU port module and not the NPI port */ - src = ocelot->num_phys_ports; - dest = BIT(port); - bypass = true; + memcpy(injection, ocelot_port->xmit_template, OCELOT_TAG_LEN); + /* Fix up the fields which are not statically determined + * in the template + */ qos_class = skb->priority; - - packing(injection, &bypass, 127, 127, OCELOT_TAG_LEN, PACK, 0); - packing(injection, &dest, 68, 56, OCELOT_TAG_LEN, PACK, 0); - packing(injection, &src, 46, 43, OCELOT_TAG_LEN, PACK, 0); packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0); if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { -- cgit From 55a48c7ec75ad8a1f4e39d271e2b5aee21150f65 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Jul 2020 15:42:36 +0800 Subject: ip_vti: not register vti_ipip_handler twice An xfrm_tunnel object is linked into the list when registering, so vti_ipip_handler can not be registered twice, otherwise its next pointer will be overwritten on the second time. So this patch is to define a new xfrm_tunnel object to register for AF_INET6. Fixes: e6ce64570f24 ("ip_vti: support IPIP6 tunnel processing") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c0b97b8f6fbd..3e5d54517145 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -484,6 +484,13 @@ static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .err_handler = vti4_err, .priority = 0, }; + +static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { + .handler = vti_rcv_tunnel, + .cb_handler = vti_rcv_cb, + .err_handler = vti4_err, + .priority = 0, +}; #endif static int __net_init vti_init_net(struct net *net) @@ -660,7 +667,7 @@ static int __init vti_init(void) if (err < 0) goto xfrm_tunnel_ipip_failed; #if IS_ENABLED(CONFIG_IPV6) - err = xfrm4_tunnel_register(&vti_ipip_handler, AF_INET6); + err = xfrm4_tunnel_register(&vti_ipip6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipip6_failed; #endif @@ -676,7 +683,7 @@ static int __init vti_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) #if IS_ENABLED(CONFIG_IPV6) - xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6); xfrm_tunnel_ipip6_failed: #endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); @@ -699,7 +706,7 @@ static void __exit vti_fini(void) rtnl_link_unregister(&vti_link_ops); #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) #if IS_ENABLED(CONFIG_IPV6) - xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&vti_ipip6_handler, AF_INET6); #endif xfrm4_tunnel_deregister(&vti_ipip_handler, AF_INET); #endif -- cgit From a8757147905ea5adee78c7a813fc080a4124f248 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Jul 2020 15:42:37 +0800 Subject: ip6_vti: not register vti_ipv6_handler twice An xfrm6_tunnel object is linked into the list when registering, so vti_ipv6_handler can not be registered twice, otherwise its next pointer will be overwritten on the second time. So this patch is to define a new xfrm6_tunnel object to register for AF_INET. Fixes: 2ab110cbb0c0 ("ip6_vti: support IP6IP tunnel processing") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index dfa93bc857d2..18ec4ab45be7 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1236,6 +1236,13 @@ static struct xfrm6_tunnel vti_ipv6_handler __read_mostly = { .err_handler = vti6_err, .priority = 0, }; + +static struct xfrm6_tunnel vti_ip6ip_handler __read_mostly = { + .handler = vti6_rcv_tunnel, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 0, +}; #endif /** @@ -1268,7 +1275,7 @@ static int __init vti6_tunnel_init(void) err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) goto vti_tunnel_ipv6_failed; - err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET); + err = xfrm6_tunnel_register(&vti_ip6ip_handler, AF_INET); if (err < 0) goto vti_tunnel_ip6ip_failed; #endif @@ -1282,7 +1289,7 @@ static int __init vti6_tunnel_init(void) rtnl_link_failed: #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) - err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); + err = xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); vti_tunnel_ipv6_failed: @@ -1306,7 +1313,7 @@ static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) - xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET); + xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); -- cgit From 8b404f46dd6ab845d0fa794679329d4089281ccb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 13 Jul 2020 15:42:38 +0800 Subject: xfrm: interface: not xfrmi_ipv6/ipip_handler twice As we did in the last 2 patches for vti(6), this patch is to define a new xfrm_tunnel object 'xfrmi_ipip6_handler' to register for AF_INET6, and a new xfrm6_tunnel object 'xfrmi_ip6ip_handler' to register for AF_INET. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 96496fdfe3ce..63a52b4b6ea9 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -830,6 +830,13 @@ static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = { .err_handler = xfrmi6_err, .priority = -1, }; + +static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { + .handler = xfrmi6_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi6_err, + .priority = -1, +}; #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { @@ -868,6 +875,13 @@ static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = { .err_handler = xfrmi4_err, .priority = -1, }; + +static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = { + .handler = xfrmi4_rcv_tunnel, + .cb_handler = xfrmi_rcv_cb, + .err_handler = xfrmi4_err, + .priority = -1, +}; #endif static int __init xfrmi4_init(void) @@ -887,7 +901,7 @@ static int __init xfrmi4_init(void) err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ipip_failed; - err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET6); + err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipip6_failed; #endif @@ -911,7 +925,7 @@ xfrm_proto_esp_failed: static void xfrmi4_fini(void) { #if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) - xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET6); + xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6); xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); #endif xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); @@ -936,7 +950,7 @@ static int __init xfrmi6_init(void) err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipv6_failed; - err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET); + err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ip6ip_failed; #endif @@ -960,7 +974,7 @@ xfrm_proto_esp_failed: static void xfrmi6_fini(void) { #if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) - xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET); + xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); -- cgit From a4fa458950b40d3849946daa32466392811a3716 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Jul 2020 12:31:45 -0700 Subject: bpfilter: Initialize pos variable Make sure 'pos' is initialized to zero before calling kernel_write(). Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module") Signed-off-by: Alexei Starovoitov --- net/bpfilter/bpfilter_kern.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 54f82c341e20..2c31e82cb953 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -48,6 +48,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.len = optlen; if (!bpfilter_ops.info.tgid) goto out; + pos = 0; n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), &pos); if (n != sizeof(req)) { -- cgit From 9326e0f85bfaf0578d40f5357f8143ec857469f5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 1 Jul 2020 18:26:44 +0900 Subject: bpfilter: Allow to build bpfilter_umh as a module without static library Originally, bpfilter_umh was linked with -static only when CONFIG_BPFILTER_UMH=y. Commit 8a2cc0505cc4 ("bpfilter: use 'userprogs' syntax to build bpfilter_umh") silently, accidentally dropped the CONFIG_BPFILTER_UMH=y test in the Makefile. Revive it in order to link it dynamically when CONFIG_BPFILTER_UMH=m. Since commit b1183b6dca3e ("bpfilter: check if $(CC) can link static libc in Kconfig"), the compiler must be capable of static linking to enable CONFIG_BPFILTER_UMH, but it requires more than needed. To loosen the compiler requirement, I changed the dependency as follows: depends on CC_CAN_LINK depends on m || CC_CAN_LINK_STATIC If CONFIG_CC_CAN_LINK_STATIC in unset, CONFIG_BPFILTER_UMH is restricted to 'm' or 'n'. In theory, CONFIG_CC_CAN_LINK is not required for CONFIG_BPFILTER_UMH=y, but I did not come up with a good way to describe it. Fixes: 8a2cc0505cc4 ("bpfilter: use 'userprogs' syntax to build bpfilter_umh") Reported-by: Michal Kubecek Signed-off-by: Masahiro Yamada Signed-off-by: Alexei Starovoitov Tested-by: Michal Kubecek Link: https://lore.kernel.org/bpf/20200701092644.762234-1-masahiroy@kernel.org --- net/bpfilter/Kconfig | 10 ++++++---- net/bpfilter/Makefile | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig index 84015ef3ee27..73d0b12789f1 100644 --- a/net/bpfilter/Kconfig +++ b/net/bpfilter/Kconfig @@ -9,12 +9,14 @@ menuconfig BPFILTER if BPFILTER config BPFILTER_UMH tristate "bpfilter kernel module with user mode helper" - depends on CC_CAN_LINK_STATIC + depends on CC_CAN_LINK + depends on m || CC_CAN_LINK_STATIC default m help This builds bpfilter kernel module with embedded user mode helper - Note: your toolchain must support building static binaries, since - rootfs isn't mounted at the time when __init functions are called - and do_execv won't be able to find the elf interpreter. + Note: To compile this as built-in, your toolchain must support + building static binaries, since rootfs isn't mounted at the time + when __init functions are called and do_execv won't be able to find + the elf interpreter. endif diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index f23b53294fba..cdac82b8c53a 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -7,10 +7,12 @@ userprogs := bpfilter_umh bpfilter_umh-objs := main.o userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi +ifeq ($(CONFIG_BPFILTER_UMH), y) # builtin bpfilter_umh should be linked with -static # since rootfs isn't mounted at the time of __init # function is called and do_execv won't find elf interpreter userldflags += -static +endif $(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh -- cgit From 78c1b4fb0e3ed6907955abf3fc8eea74704fe072 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:50 +0200 Subject: bridge: mrp: Extend br_mrp for MRP interconnect This patch extends the 'struct br_mrp' to contain information regarding the MRP interconnect. It contains the following: - the interconnect port 'i_port', which is NULL if the node doesn't have a interconnect role - the interconnect id, which is similar with the ring id, but this field is also part of the MRP_InTest frames. - the interconnect role, which can be MIM or MIC. - the interconnect state, which can be open or closed. - the interconnect delayed_work for sending MRP_InTest frames and check for lost of continuity. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_private_mrp.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 315eb37d89f0..8841ba847fb2 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -12,8 +12,10 @@ struct br_mrp { struct net_bridge_port __rcu *p_port; struct net_bridge_port __rcu *s_port; + struct net_bridge_port __rcu *i_port; u32 ring_id; + u16 in_id; u16 prio; enum br_mrp_ring_role_type ring_role; @@ -21,6 +23,11 @@ struct br_mrp { enum br_mrp_ring_state_type ring_state; u32 ring_transitions; + enum br_mrp_in_role_type in_role; + u8 in_role_offloaded; + enum br_mrp_in_state_type in_state; + u32 in_transitions; + struct delayed_work test_work; u32 test_interval; unsigned long test_end; @@ -28,6 +35,12 @@ struct br_mrp { u32 test_max_miss; bool test_monitor; + struct delayed_work in_test_work; + u32 in_test_interval; + unsigned long in_test_end; + u32 in_test_count_miss; + u32 in_test_max_miss; + u32 seq_id; struct rcu_head rcu; -- cgit From 4cc625c63a9274f28f05d5be39a2cbeb48cbfed5 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:51 +0200 Subject: bridge: mrp: Rename br_mrp_port_open to br_mrp_ring_port_open This patch renames the function br_mrp_port_open to br_mrp_ring_port_open. In this way is more clear that a ring port lost the continuity because there will be also a br_mrp_in_port_open. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp.c | 6 +++--- net/bridge/br_mrp_netlink.c | 2 +- net/bridge/br_private_mrp.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index 90592af9db61..fe7cf1446b58 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -213,7 +213,7 @@ static void br_mrp_test_work_expired(struct work_struct *work) } if (notify_open && !mrp->ring_role_offloaded) - br_mrp_port_open(p->dev, true); + br_mrp_ring_port_open(p->dev, true); } p = rcu_dereference(mrp->s_port); @@ -229,7 +229,7 @@ static void br_mrp_test_work_expired(struct work_struct *work) } if (notify_open && !mrp->ring_role_offloaded) - br_mrp_port_open(p->dev, true); + br_mrp_ring_port_open(p->dev, true); } out: @@ -537,7 +537,7 @@ static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port, * not closed */ if (mrp->ring_state != BR_MRP_RING_STATE_CLOSED) - br_mrp_port_open(port->dev, false); + br_mrp_ring_port_open(port->dev, false); } /* Determin if the test hdr has a better priority than the node */ diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index c4f5c356811f..acce300c0cc2 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -368,7 +368,7 @@ nla_info_failure: return -EMSGSIZE; } -int br_mrp_port_open(struct net_device *dev, u8 loc) +int br_mrp_ring_port_open(struct net_device *dev, u8 loc) { struct net_bridge_port *p; int err = 0; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 8841ba847fb2..e93c8f9d4df5 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -74,6 +74,6 @@ int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, enum br_mrp_port_role_type role); /* br_mrp_netlink.c */ -int br_mrp_port_open(struct net_device *dev, u8 loc); +int br_mrp_ring_port_open(struct net_device *dev, u8 loc); #endif /* _BR_PRIVATE_MRP_H */ -- cgit From 4139d4b51a462135d6368e80a9314b57e57279f2 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:52 +0200 Subject: bridge: mrp: Add br_mrp_in_port_open function This function notifies the userspace when the node lost the continuity of MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp_netlink.c | 22 ++++++++++++++++++++++ net/bridge/br_private_mrp.h | 1 + 2 files changed, 23 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index acce300c0cc2..4bf7aaeb2915 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -389,3 +389,25 @@ int br_mrp_ring_port_open(struct net_device *dev, u8 loc) out: return err; } + +int br_mrp_in_port_open(struct net_device *dev, u8 loc) +{ + struct net_bridge_port *p; + int err = 0; + + p = br_port_get_rcu(dev); + if (!p) { + err = -EINVAL; + goto out; + } + + if (loc) + p->flags |= BR_MRP_LOST_IN_CONT; + else + p->flags &= ~BR_MRP_LOST_IN_CONT; + + br_ifinfo_notify(RTM_NEWLINK, NULL, p); + +out: + return err; +} diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index e93c8f9d4df5..23da2f956ad0 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -75,5 +75,6 @@ int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, /* br_mrp_netlink.c */ int br_mrp_ring_port_open(struct net_device *dev, u8 loc); +int br_mrp_in_port_open(struct net_device *dev, u8 loc); #endif /* _BR_PRIVATE_MRP_H */ -- cgit From f23f0db3607582636b475eaeb74d32e924f11c41 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:53 +0200 Subject: bridge: switchdev: mrp: Extend MRP API for switchdev for MRP Interconnect Implement the MRP API for interconnect switchdev. Similar with the other br_mrp_switchdev function, these function will just eventually call the switchdev functions: switchdev_port_obj_add/del. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp_switchdev.c | 62 +++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_private_mrp.h | 7 +++++ 2 files changed, 69 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c index 0da68a0da4b5..ed547e03ace1 100644 --- a/net/bridge/br_mrp_switchdev.c +++ b/net/bridge/br_mrp_switchdev.c @@ -107,6 +107,68 @@ int br_mrp_switchdev_set_ring_state(struct net_bridge *br, return 0; } +int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, + u16 in_id, u32 ring_id, + enum br_mrp_in_role_type role) +{ + struct switchdev_obj_in_role_mrp mrp_role = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_IN_ROLE_MRP, + .in_role = role, + .in_id = mrp->in_id, + .ring_id = mrp->ring_id, + .i_port = rtnl_dereference(mrp->i_port)->dev, + }; + int err; + + if (role == BR_MRP_IN_ROLE_DISABLED) + err = switchdev_port_obj_del(br->dev, &mrp_role.obj); + else + err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL); + + return err; +} + +int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp, + enum br_mrp_in_state_type state) +{ + struct switchdev_obj_in_state_mrp mrp_state = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_IN_STATE_MRP, + .in_state = state, + .in_id = mrp->in_id, + }; + int err; + + err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL); + + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, + u32 interval, u8 max_miss, u32 period) +{ + struct switchdev_obj_in_test_mrp test = { + .obj.orig_dev = br->dev, + .obj.id = SWITCHDEV_OBJ_ID_IN_TEST_MRP, + .interval = interval, + .max_miss = max_miss, + .in_id = mrp->in_id, + .period = period, + }; + int err; + + if (interval == 0) + err = switchdev_port_obj_del(br->dev, &test.obj); + else + err = switchdev_port_obj_add(br->dev, &test.obj, NULL); + + return err; +} + int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, enum br_mrp_port_state_type state) { diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 23da2f956ad0..0d554ef88db8 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -72,6 +72,13 @@ int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, enum br_mrp_port_state_type state); int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, enum br_mrp_port_role_type role); +int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, + u16 in_id, u32 ring_id, + enum br_mrp_in_role_type role); +int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp, + enum br_mrp_in_state_type state); +int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, + u32 interval, u8 max_miss, u32 period); /* br_mrp_netlink.c */ int br_mrp_ring_port_open(struct net_device *dev, u8 loc); -- cgit From 537ed5676d4648abc8ef75b5c04d773d961aee2f Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:54 +0200 Subject: bridge: mrp: Implement the MRP Interconnect API Thie patch adds support for MRP Interconnect. Similar with the MRP ring, if the HW can't generate MRP_InTest frames, then the SW will try to generate them. And if also the SW fails to generate the frames then an error is return to userspace. The forwarding/termination of MRP_In frames is happening in the kernel and is done by MRP instances. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp.c | 570 +++++++++++++++++++++++++++++++++++++++++--- net/bridge/br_private_mrp.h | 4 + 2 files changed, 543 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index fe7cf1446b58..b36689e6e7cb 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -4,6 +4,27 @@ #include "br_private_mrp.h" static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 }; +static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x3 }; + +static bool br_mrp_is_ring_port(struct net_bridge_port *p_port, + struct net_bridge_port *s_port, + struct net_bridge_port *port) +{ + if (port == p_port || + port == s_port) + return true; + + return false; +} + +static bool br_mrp_is_in_port(struct net_bridge_port *i_port, + struct net_bridge_port *port) +{ + if (port == i_port) + return true; + + return false; +} static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br, u32 ifindex) @@ -37,6 +58,22 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) return res; } +static struct br_mrp *br_mrp_find_in_id(struct net_bridge *br, u32 in_id) +{ + struct br_mrp *res = NULL; + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + if (mrp->in_id == in_id) { + res = mrp; + break; + } + } + + return res; +} + static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) { struct br_mrp *mrp; @@ -52,6 +89,10 @@ static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) p = rtnl_dereference(mrp->s_port); if (p && p->dev->ifindex == ifindex) return false; + + p = rtnl_dereference(mrp->i_port); + if (p && p->dev->ifindex == ifindex) + return false; } return true; @@ -66,7 +107,8 @@ static struct br_mrp *br_mrp_find_port(struct net_bridge *br, list_for_each_entry_rcu(mrp, &br->mrp_list, list, lockdep_rtnl_is_held()) { if (rcu_access_pointer(mrp->p_port) == p || - rcu_access_pointer(mrp->s_port) == p) { + rcu_access_pointer(mrp->s_port) == p || + rcu_access_pointer(mrp->i_port) == p) { res = mrp; break; } @@ -160,6 +202,36 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp, return skb; } +static struct sk_buff *br_mrp_alloc_in_test_skb(struct br_mrp *mrp, + struct net_bridge_port *p, + enum br_mrp_port_role_type port_role) +{ + struct br_mrp_in_test_hdr *hdr = NULL; + struct sk_buff *skb = NULL; + + if (!p) + return NULL; + + skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_in_test_dmac); + if (!skb) + return NULL; + + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_IN_TEST, sizeof(*hdr)); + hdr = skb_put(skb, sizeof(*hdr)); + + hdr->id = cpu_to_be16(mrp->in_id); + ether_addr_copy(hdr->sa, p->br->dev->dev_addr); + hdr->port_role = cpu_to_be16(port_role); + hdr->state = cpu_to_be16(mrp->in_state); + hdr->transitions = cpu_to_be16(mrp->in_transitions); + hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies)); + + br_mrp_skb_common(skb, mrp); + br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0); + + return skb; +} + /* This function is continuously called in the following cases: * - when node role is MRM, in this case test_monitor is always set to false * because it needs to notify the userspace that the ring is open and needs to @@ -239,6 +311,83 @@ out: usecs_to_jiffies(mrp->test_interval)); } +/* This function is continuously called when the node has the interconnect role + * MIM. It would generate interconnect test frames and will send them on all 3 + * ports. But will also check if it stop receiving interconnect test frames. + */ +static void br_mrp_in_test_work_expired(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct br_mrp *mrp = container_of(del_work, struct br_mrp, in_test_work); + struct net_bridge_port *p; + bool notify_open = false; + struct sk_buff *skb; + + if (time_before_eq(mrp->in_test_end, jiffies)) + return; + + if (mrp->in_test_count_miss < mrp->in_test_max_miss) { + mrp->in_test_count_miss++; + } else { + /* Notify that the interconnect ring is open only if the + * interconnect ring state is closed, otherwise it would + * continue to notify at every interval. + */ + if (mrp->in_state == BR_MRP_IN_STATE_CLOSED) + notify_open = true; + } + + rcu_read_lock(); + + p = rcu_dereference(mrp->p_port); + if (p) { + skb = br_mrp_alloc_in_test_skb(mrp, p, + BR_MRP_PORT_ROLE_PRIMARY); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + + if (notify_open && !mrp->in_role_offloaded) + br_mrp_in_port_open(p->dev, true); + } + + p = rcu_dereference(mrp->s_port); + if (p) { + skb = br_mrp_alloc_in_test_skb(mrp, p, + BR_MRP_PORT_ROLE_SECONDARY); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + + if (notify_open && !mrp->in_role_offloaded) + br_mrp_in_port_open(p->dev, true); + } + + p = rcu_dereference(mrp->i_port); + if (p) { + skb = br_mrp_alloc_in_test_skb(mrp, p, + BR_MRP_PORT_ROLE_INTER); + if (!skb) + goto out; + + skb_reset_network_header(skb); + dev_queue_xmit(skb); + + if (notify_open && !mrp->in_role_offloaded) + br_mrp_in_port_open(p->dev, true); + } + +out: + rcu_read_unlock(); + + queue_delayed_work(system_wq, &mrp->in_test_work, + usecs_to_jiffies(mrp->in_test_interval)); +} + /* Deletes the MRP instance. * note: called under rtnl_lock */ @@ -251,6 +400,10 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) cancel_delayed_work_sync(&mrp->test_work); br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0, 0); + /* Stop sending MRP_InTest frames if has an interconnect role */ + cancel_delayed_work_sync(&mrp->in_test_work); + br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0); + br_mrp_switchdev_del(br, mrp); /* Reset the ports */ @@ -278,6 +431,18 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) rcu_assign_pointer(mrp->s_port, NULL); } + p = rtnl_dereference(mrp->i_port); + if (p) { + spin_lock_bh(&br->lock); + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; + p->flags &= ~BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + br_mrp_port_switchdev_set_state(p, state); + rcu_assign_pointer(mrp->i_port, NULL); + } + list_del_rcu(&mrp->list); kfree_rcu(mrp, rcu); } @@ -329,6 +494,7 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) rcu_assign_pointer(mrp->s_port, p); INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired); + INIT_DELAYED_WORK(&mrp->in_test_work, br_mrp_in_test_work_expired); list_add_tail_rcu(&mrp->list, &br->mrp_list); err = br_mrp_switchdev_add(br, mrp); @@ -511,6 +677,180 @@ int br_mrp_start_test(struct net_bridge *br, return 0; } +/* Set in state, int state can be only Open or Closed + * note: already called with rtnl_lock + */ +int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state) +{ + struct br_mrp *mrp = br_mrp_find_in_id(br, state->in_id); + + if (!mrp) + return -EINVAL; + + if (mrp->in_state == BR_MRP_IN_STATE_CLOSED && + state->in_state != BR_MRP_IN_STATE_CLOSED) + mrp->in_transitions++; + + mrp->in_state = state->in_state; + + br_mrp_switchdev_set_in_state(br, mrp, state->in_state); + + return 0; +} + +/* Set in role, in role can be only MIM(Media Interconnection Manager) or + * MIC(Media Interconnection Client). + * note: already called with rtnl_lock + */ +int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role) +{ + struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id); + struct net_bridge_port *p; + int err; + + if (!mrp) + return -EINVAL; + + if (!br_mrp_get_port(br, role->i_ifindex)) + return -EINVAL; + + if (role->in_role == BR_MRP_IN_ROLE_DISABLED) { + u8 state; + + /* It is not allowed to disable a port that doesn't exist */ + p = rtnl_dereference(mrp->i_port); + if (!p) + return -EINVAL; + + /* Stop the generating MRP_InTest frames */ + cancel_delayed_work_sync(&mrp->in_test_work); + br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0); + + /* Remove the port */ + spin_lock_bh(&br->lock); + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; + p->flags &= ~BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + br_mrp_port_switchdev_set_state(p, state); + rcu_assign_pointer(mrp->i_port, NULL); + + mrp->in_role = role->in_role; + mrp->in_id = 0; + + return 0; + } + + /* It is not possible to have the same port part of multiple rings */ + if (!br_mrp_unique_ifindex(br, role->i_ifindex)) + return -EINVAL; + + /* It is not allowed to set a different interconnect port if the mrp + * instance has already one. First it needs to be disabled and after + * that set the new port + */ + if (rcu_access_pointer(mrp->i_port)) + return -EINVAL; + + p = br_mrp_get_port(br, role->i_ifindex); + spin_lock_bh(&br->lock); + p->state = BR_STATE_FORWARDING; + p->flags |= BR_MRP_AWARE; + spin_unlock_bh(&br->lock); + rcu_assign_pointer(mrp->i_port, p); + + mrp->in_role = role->in_role; + mrp->in_id = role->in_id; + + /* If there is an error just bailed out */ + err = br_mrp_switchdev_set_in_role(br, mrp, role->in_id, + role->ring_id, role->in_role); + if (err && err != -EOPNOTSUPP) + return err; + + /* Now detect if the HW actually applied the role or not. If the HW + * applied the role it means that the SW will not to do those operations + * anymore. For example if the role is MIM then the HW will notify the + * SW when interconnect ring is open, but if the is not pushed to the HW + * the SW will need to detect when the interconnect ring is open. + */ + mrp->in_role_offloaded = err == -EOPNOTSUPP ? 0 : 1; + + return 0; +} + +/* Start to generate MRP_InTest frames, the frames are generated by + * HW and if it fails, they are generated by the SW. + * note: already called with rtnl_lock + */ +int br_mrp_start_in_test(struct net_bridge *br, + struct br_mrp_start_in_test *in_test) +{ + struct br_mrp *mrp = br_mrp_find_in_id(br, in_test->in_id); + + if (!mrp) + return -EINVAL; + + if (mrp->in_role != BR_MRP_IN_ROLE_MIM) + return -EINVAL; + + /* Try to push it to the HW and if it fails then continue with SW + * implementation and if that also fails then return error. + */ + if (!br_mrp_switchdev_send_in_test(br, mrp, in_test->interval, + in_test->max_miss, in_test->period)) + return 0; + + mrp->in_test_interval = in_test->interval; + mrp->in_test_end = jiffies + usecs_to_jiffies(in_test->period); + mrp->in_test_max_miss = in_test->max_miss; + mrp->in_test_count_miss = 0; + queue_delayed_work(system_wq, &mrp->in_test_work, + usecs_to_jiffies(in_test->interval)); + + return 0; +} + +/* Determin if the frame type is a ring frame */ +static bool br_mrp_ring_frame(struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return false; + + if (hdr->type == BR_MRP_TLV_HEADER_RING_TEST || + hdr->type == BR_MRP_TLV_HEADER_RING_TOPO || + hdr->type == BR_MRP_TLV_HEADER_RING_LINK_DOWN || + hdr->type == BR_MRP_TLV_HEADER_RING_LINK_UP || + hdr->type == BR_MRP_TLV_HEADER_OPTION) + return true; + + return false; +} + +/* Determin if the frame type is an interconnect frame */ +static bool br_mrp_in_frame(struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return false; + + if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST || + hdr->type == BR_MRP_TLV_HEADER_IN_TOPO || + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN || + hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP) + return true; + + return false; +} + /* Process only MRP Test frame. All the other MRP frames are processed by * userspace application * note: already called with rcu_read_lock @@ -591,17 +931,92 @@ static void br_mrp_mra_process(struct br_mrp *mrp, struct net_bridge *br, mrp->test_count_miss = 0; } -/* This will just forward the frame to the other mrp ring port(MRC role) or will - * not do anything. +/* Process only MRP InTest frame. All the other MRP frames are processed by + * userspace application + * note: already called with rcu_read_lock + */ +static bool br_mrp_mim_process(struct br_mrp *mrp, struct net_bridge_port *port, + struct sk_buff *skb) +{ + const struct br_mrp_in_test_hdr *in_hdr; + struct br_mrp_in_test_hdr _in_hdr; + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + /* Each MRP header starts with a version field which is 16 bits. + * Therefore skip the version and get directly the TLV header. + */ + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return false; + + /* The check for InTest frame type was already done */ + in_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr), + sizeof(_in_hdr), &_in_hdr); + if (!in_hdr) + return false; + + /* It needs to process only it's own InTest frames. */ + if (mrp->in_id != ntohs(in_hdr->id)) + return false; + + mrp->in_test_count_miss = 0; + + /* Notify the userspace that the ring is closed only when the ring is + * not closed + */ + if (mrp->in_state != BR_MRP_IN_STATE_CLOSED) + br_mrp_in_port_open(port->dev, false); + + return true; +} + +/* Get the MRP frame type + * note: already called with rcu_read_lock + */ +static u8 br_mrp_get_frame_type(struct sk_buff *skb) +{ + const struct br_mrp_tlv_hdr *hdr; + struct br_mrp_tlv_hdr _hdr; + + /* Each MRP header starts with a version field which is 16 bits. + * Therefore skip the version and get directly the TLV header. + */ + hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); + if (!hdr) + return 0xff; + + return hdr->type; +} + +static bool br_mrp_mrm_behaviour(struct br_mrp *mrp) +{ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRM || + (mrp->ring_role == BR_MRP_RING_ROLE_MRA && !mrp->test_monitor)) + return true; + + return false; +} + +static bool br_mrp_mrc_behaviour(struct br_mrp *mrp) +{ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRC || + (mrp->ring_role == BR_MRP_RING_ROLE_MRA && mrp->test_monitor)) + return true; + + return false; +} + +/* This will just forward the frame to the other mrp ring ports, depending on + * the frame type, ring role and interconnect role * note: already called with rcu_read_lock */ static int br_mrp_rcv(struct net_bridge_port *p, struct sk_buff *skb, struct net_device *dev) { - struct net_device *s_dev, *p_dev, *d_dev; - struct net_bridge_port *p_port, *s_port; + struct net_bridge_port *p_port, *s_port, *i_port = NULL; + struct net_bridge_port *p_dst, *s_dst, *i_dst = NULL; struct net_bridge *br; - struct sk_buff *nskb; struct br_mrp *mrp; /* If port is disabled don't accept any frames */ @@ -616,46 +1031,139 @@ static int br_mrp_rcv(struct net_bridge_port *p, p_port = rcu_dereference(mrp->p_port); if (!p_port) return 0; + p_dst = p_port; s_port = rcu_dereference(mrp->s_port); if (!s_port) return 0; + s_dst = s_port; - /* If the role is MRM then don't forward the frames */ - if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) { - br_mrp_mrm_process(mrp, p, skb); - return 1; - } - - /* If the role is MRA then don't forward the frames if it behaves as - * MRM node + /* If the frame is a ring frame then it is not required to check the + * interconnect role and ports to process or forward the frame */ - if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) { - if (!mrp->test_monitor) { + if (br_mrp_ring_frame(skb)) { + /* If the role is MRM then don't forward the frames */ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) { br_mrp_mrm_process(mrp, p, skb); - return 1; + goto no_forward; } - br_mrp_mra_process(mrp, br, p, skb); + /* If the role is MRA then don't forward the frames if it + * behaves as MRM node + */ + if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) { + if (!mrp->test_monitor) { + br_mrp_mrm_process(mrp, p, skb); + goto no_forward; + } + + br_mrp_mra_process(mrp, br, p, skb); + } + + goto forward; } - /* Clone the frame and forward it on the other MRP port */ - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - return 0; + if (br_mrp_in_frame(skb)) { + u8 in_type = br_mrp_get_frame_type(skb); - p_dev = p_port->dev; - s_dev = s_port->dev; + i_port = rcu_dereference(mrp->i_port); + i_dst = i_port; - if (p_dev == dev) - d_dev = s_dev; - else - d_dev = p_dev; + /* If the ring port is in block state it should not forward + * In_Test frames + */ + if (br_mrp_is_ring_port(p_port, s_port, p) && + p->state == BR_STATE_BLOCKING && + in_type == BR_MRP_TLV_HEADER_IN_TEST) + goto no_forward; + + /* Nodes that behaves as MRM needs to stop forwarding the + * frames in case the ring is closed, otherwise will be a loop. + * In this case the frame is no forward between the ring ports. + */ + if (br_mrp_mrm_behaviour(mrp) && + br_mrp_is_ring_port(p_port, s_port, p) && + (s_port->state != BR_STATE_FORWARDING || + p_port->state != BR_STATE_FORWARDING)) { + p_dst = NULL; + s_dst = NULL; + } + + /* A node that behaves as MRC and doesn't have a interconnect + * role then it should forward all frames between the ring ports + * because it doesn't have an interconnect port + */ + if (br_mrp_mrc_behaviour(mrp) && + mrp->in_role == BR_MRP_IN_ROLE_DISABLED) + goto forward; + + if (mrp->in_role == BR_MRP_IN_ROLE_MIM) { + if (in_type == BR_MRP_TLV_HEADER_IN_TEST) { + /* MIM should not forward it's own InTest + * frames + */ + if (br_mrp_mim_process(mrp, p, skb)) { + goto no_forward; + } else { + if (br_mrp_is_ring_port(p_port, s_port, + p)) + i_dst = NULL; + + if (br_mrp_is_in_port(i_port, p)) + goto no_forward; + } + } else { + /* MIM should forward IntLinkChange and + * IntTopoChange between ring ports but MIM + * should not forward IntLinkChange and + * IntTopoChange if the frame was received at + * the interconnect port + */ + if (br_mrp_is_ring_port(p_port, s_port, p)) + i_dst = NULL; + + if (br_mrp_is_in_port(i_port, p)) + goto no_forward; + } + } + + if (mrp->in_role == BR_MRP_IN_ROLE_MIC) { + /* MIC should forward InTest frames on all ports + * regardless of the received port + */ + if (in_type == BR_MRP_TLV_HEADER_IN_TEST) + goto forward; + + /* MIC should forward IntLinkChange frames only if they + * are received on ring ports to all the ports + */ + if (br_mrp_is_ring_port(p_port, s_port, p) && + (in_type == BR_MRP_TLV_HEADER_IN_LINK_UP || + in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN)) + goto forward; + + /* Should forward the InTopo frames only between the + * ring ports + */ + if (in_type == BR_MRP_TLV_HEADER_IN_TOPO) { + i_dst = NULL; + goto forward; + } + + /* In all the other cases don't forward the frames */ + goto no_forward; + } + } - nskb->dev = d_dev; - skb_push(nskb, ETH_HLEN); - dev_queue_xmit(nskb); +forward: + if (p_dst) + br_forward(p_dst, skb, true, false); + if (s_dst) + br_forward(s_dst, skb, true, false); + if (i_dst) + br_forward(i_dst, skb, true, false); +no_forward: return 1; } diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 0d554ef88db8..af0e9eff6549 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -57,6 +57,10 @@ int br_mrp_set_ring_state(struct net_bridge *br, struct br_mrp_ring_state *state); int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role); int br_mrp_start_test(struct net_bridge *br, struct br_mrp_start_test *test); +int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state); +int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role); +int br_mrp_start_in_test(struct net_bridge *br, + struct br_mrp_start_in_test *test); /* br_mrp_switchdev.c */ int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp); -- cgit From 7ab1748e4ce607ba1b4133bfe7c2be0022339d87 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:55 +0200 Subject: bridge: mrp: Extend MRP netlink interface for configuring MRP interconnect This patch extends the existing MRP netlink interface with the following attributes: IFLA_BRIDGE_MRP_IN_ROLE, IFLA_BRIDGE_MRP_IN_STATE and IFLA_BRIDGE_MRP_START_IN_TEST. These attributes are similar with their ring attributes but they apply to the interconnect port. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp_netlink.c | 140 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index 4bf7aaeb2915..a006e0771e8d 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -14,6 +14,9 @@ static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = { [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED }, [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_IN_ROLE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_IN_STATE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_START_IN_TEST] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -235,6 +238,121 @@ static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr, return br_mrp_start_test(br, &test); } +static const struct nla_policy +br_mrp_in_state_policy[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1] = { + [IFLA_BRIDGE_MRP_IN_STATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_IN_STATE_IN_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_IN_STATE_STATE] = { .type = NLA_U32 }, +}; + +static int br_mrp_in_state_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_IN_STATE_MAX + 1]; + struct br_mrp_in_state state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_STATE_MAX, attr, + br_mrp_in_state_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID] || + !tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: IN_ID or STATE"); + return -EINVAL; + } + + memset(&state, 0x0, sizeof(state)); + + state.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_IN_ID]); + state.in_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_STATE_STATE]); + + return br_mrp_set_in_state(br, &state); +} + +static const struct nla_policy +br_mrp_in_role_policy[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1] = { + [IFLA_BRIDGE_MRP_IN_ROLE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MRP_IN_ROLE_ROLE] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] = { .type = NLA_U32 }, +}; + +static int br_mrp_in_role_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_IN_ROLE_MAX + 1]; + struct br_mrp_in_role role; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_IN_ROLE_MAX, attr, + br_mrp_in_role_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID] || + !tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX] || + !tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or ROLE or IN_ID or I_IFINDEX"); + return -EINVAL; + } + + memset(&role, 0x0, sizeof(role)); + + role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_RING_ID]); + role.in_id = nla_get_u16(tb[IFLA_BRIDGE_MRP_IN_ROLE_IN_ID]); + role.i_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_I_IFINDEX]); + role.in_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_IN_ROLE_ROLE]); + + return br_mrp_set_in_role(br, &role); +} + +static const struct nla_policy +br_mrp_start_in_test_policy[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1] = { + [IFLA_BRIDGE_MRP_START_IN_TEST_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD] = { .type = NLA_U32 }, +}; + +static int br_mrp_start_in_test_parse(struct net_bridge *br, + struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX + 1]; + struct br_mrp_start_in_test test; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_IN_TEST_MAX, attr, + br_mrp_start_in_test_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID] || + !tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL] || + !tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS] || + !tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD"); + return -EINVAL; + } + + memset(&test, 0x0, sizeof(test)); + + test.in_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_IN_ID]); + test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_INTERVAL]); + test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_MAX_MISS]); + test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_IN_TEST_PERIOD]); + + return br_mrp_start_in_test(br, &test); +} + int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { @@ -301,6 +419,28 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, return err; } + if (tb[IFLA_BRIDGE_MRP_IN_STATE]) { + err = br_mrp_in_state_parse(br, tb[IFLA_BRIDGE_MRP_IN_STATE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_IN_ROLE]) { + err = br_mrp_in_role_parse(br, tb[IFLA_BRIDGE_MRP_IN_ROLE], + extack); + if (err) + return err; + } + + if (tb[IFLA_BRIDGE_MRP_START_IN_TEST]) { + err = br_mrp_start_in_test_parse(br, + tb[IFLA_BRIDGE_MRP_START_IN_TEST], + extack); + if (err) + return err; + } + return 0; } -- cgit From 4fc4871fc2dc91098948e038ac9af7a0d1e3166a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:57 +0200 Subject: bridge: mrp: Extend br_mrp_fill_info This patch extends the function br_mrp_fill_info to return also the status for the interconnect ring. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mrp_netlink.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net') diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index a006e0771e8d..2a2fdf3500c5 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -474,6 +474,11 @@ int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) p->dev->ifindex)) goto nla_put_failure; + p = rcu_dereference(mrp->i_port); + if (p && nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_I_IFINDEX, + p->dev->ifindex)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_MRP_INFO_PRIO, mrp->prio)) goto nla_put_failure; @@ -493,6 +498,19 @@ int br_mrp_fill_info(struct sk_buff *skb, struct net_bridge *br) mrp->test_monitor)) goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_STATE, + mrp->in_state)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_ROLE, + mrp->in_role)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_INTERVAL, + mrp->in_test_interval)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_BRIDGE_MRP_INFO_IN_TEST_MAX_MISS, + mrp->in_test_max_miss)) + goto nla_put_failure; + nla_nest_end(skb, tb); } nla_nest_end(skb, mrp_tb); -- cgit From ffb3adba64801f70c472303c9e386eb5eaec193d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 14 Jul 2020 09:34:58 +0200 Subject: net: bridge: Add port attribute IFLA_BRPORT_MRP_IN_OPEN This patch adds a new port attribute, IFLA_BRPORT_MRP_IN_OPEN, which allows to notify the userspace when the node lost the contiuity of MRP_InTest frames. Signed-off-by: Horatiu Vultur Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c532fa65c983..147d52596e17 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void) #endif + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */ + + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */ + 0; } @@ -216,6 +217,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, !!(p->flags & BR_NEIGH_SUPPRESS)) || nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags & BR_MRP_LOST_CONT)) || + nla_put_u8(skb, IFLA_BRPORT_MRP_IN_OPEN, + !!(p->flags & BR_MRP_LOST_IN_CONT)) || nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED))) return -EMSGSIZE; -- cgit From 37adf701dd8790fd019c513b7a892d7178170338 Mon Sep 17 00:00:00 2001 From: Daniel Winkler Date: Tue, 14 Jul 2020 14:16:00 -0700 Subject: Bluetooth: Add per-instance adv disable/remove Add functionality to disable and remove advertising instances, and use that functionality in MGMT add/remove advertising calls. Currently, advertising is globally-disabled, i.e. all instances are disabled together, even if hardware offloading is available. This patch adds functionality to disable and remove individual adv instances, solving two issues: 1. On new advertisement registration, a global disable was done, and then only the new instance was enabled. This meant only the newest instance was actually enabled. 2. On advertisement removal, the structure was removed, but the instance was never disabled or removed, which is incorrect with hardware offload support. Signed-off-by: Daniel Winkler Reviewed-by: Shyh-In Hwang Reviewed-by: Alain Michaud Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_request.c | 59 ++++++++++++++++++++++++++++++++++++++------- net/bluetooth/hci_request.h | 2 ++ net/bluetooth/mgmt.c | 6 +++++ 4 files changed, 59 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 8805d68e65f2..be67361ff2f0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -931,7 +931,7 @@ static void hci_req_directed_advertising(struct hci_request *req, * So it is required to remove adv set for handle 0x00. since we use * instance 0 for directed adv. */ - hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(cp.handle), &cp.handle); + __hci_req_remove_ext_adv_instance(req, cp.handle); hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 770b93758112..7c0c2fda04ad 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1179,13 +1179,8 @@ static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) void __hci_req_disable_advertising(struct hci_request *req) { if (ext_adv_capable(req->hdev)) { - struct hci_cp_le_set_ext_adv_enable cp; + __hci_req_disable_ext_adv_instance(req, 0x00); - cp.enable = 0x00; - /* Disable all sets since we only support one set at the moment */ - cp.num_of_sets = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), &cp); } else { u8 enable = 0x00; @@ -1950,13 +1945,59 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance) return 0; } +int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_ext_adv_enable *cp; + struct hci_cp_ext_adv_set *adv_set; + u8 data[sizeof(*cp) + sizeof(*adv_set) * 1]; + u8 req_size; + + /* If request specifies an instance that doesn't exist, fail */ + if (instance > 0 && !hci_find_adv_instance(hdev, instance)) + return -EINVAL; + + memset(data, 0, sizeof(data)); + + cp = (void *)data; + adv_set = (void *)cp->data; + + /* Instance 0x00 indicates all advertising instances will be disabled */ + cp->num_of_sets = !!instance; + cp->enable = 0x00; + + adv_set->handle = instance; + + req_size = sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets; + hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, req_size, data); + + return 0; +} + +int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + + /* If request specifies an instance that doesn't exist, fail */ + if (instance > 0 && !hci_find_adv_instance(hdev, instance)) + return -EINVAL; + + hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(instance), &instance); + + return 0; +} + int __hci_req_start_ext_adv(struct hci_request *req, u8 instance) { struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance = hci_find_adv_instance(hdev, instance); int err; - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - __hci_req_disable_advertising(req); + /* If instance isn't pending, the chip knows about it, and it's safe to + * disable + */ + if (adv_instance && !adv_instance->pending) + __hci_req_disable_ext_adv_instance(req, instance); err = __hci_req_setup_ext_adv_instance(req, instance); if (err < 0) @@ -2104,7 +2145,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, hci_dev_test_flag(hdev, HCI_ADVERTISING)) return; - if (next_instance) + if (next_instance && !ext_adv_capable(hdev)) __hci_req_schedule_adv_instance(req, next_instance->instance, false); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 0e81614d235e..bbe892ab078a 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -86,6 +86,8 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance); int __hci_req_start_ext_adv(struct hci_request *req, u8 instance); int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance); +int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance); +int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance); void __hci_req_clear_ext_adv_sets(struct hci_request *req); int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, bool use_rpa, struct adv_info *adv_instance, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 686ef4792831..f45105d2de77 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7504,6 +7504,12 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); + /* If we use extended advertising, instance is disabled and removed */ + if (ext_adv_capable(hdev)) { + __hci_req_disable_ext_adv_instance(&req, cp->instance); + __hci_req_remove_ext_adv_instance(&req, cp->instance); + } + hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) -- cgit From 336f531ab17c3f480f8289d26c35bd48302ed085 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Jul 2020 13:56:22 +0300 Subject: netfilter: nf_tables: Fix a use after free in nft_immediate_destroy() The nf_tables_rule_release() function frees "rule" so we have to use the _safe() version of list_for_each_entry(). Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_immediate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 9e556638bb32..c63eb3b17178 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -103,9 +103,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, { const struct nft_immediate_expr *priv = nft_expr_priv(expr); const struct nft_data *data = &priv->data; + struct nft_rule *rule, *n; struct nft_ctx chain_ctx; struct nft_chain *chain; - struct nft_rule *rule; if (priv->dreg != NFT_REG_VERDICT) return; @@ -121,7 +121,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, chain_ctx = *ctx; chain_ctx.chain = chain; - list_for_each_entry(rule, &chain->rules, list) + list_for_each_entry_safe(rule, n, &chain->rules, list) nf_tables_rule_release(&chain_ctx, rule); nf_tables_chain_destroy(&chain_ctx); -- cgit From 514d09529db905251916399a90b69e1a3eac6cb1 Mon Sep 17 00:00:00 2001 From: Suraj Upadhyay Date: Tue, 14 Jul 2020 19:53:28 +0530 Subject: decnet: dn_dev: Remove an unnecessary label. Remove the unnecessary label from dn_dev_ioctl() and make its error handling simpler to read. Signed-off-by: Suraj Upadhyay Signed-off-by: Jakub Kicinski --- net/decnet/dn_dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 65abcf1b3210..15d42353f1a3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -462,7 +462,9 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) switch (cmd) { case SIOCGIFADDR: *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; - goto rarok; + if (copy_to_user(arg, ifr, DN_IFREQ_SIZE)) + ret = -EFAULT; + break; case SIOCSIFADDR: if (!ifa) { @@ -485,10 +487,6 @@ done: rtnl_unlock(); return ret; -rarok: - if (copy_to_user(arg, ifr, DN_IFREQ_SIZE)) - ret = -EFAULT; - goto done; } struct net_device *dn_dev_get_default(void) -- cgit From 9216477449f33cdbc9c9a99d49f500b7fbb81702 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Jul 2020 15:56:38 +0200 Subject: bpf: cpumap: Add the possibility to attach an eBPF program to cpumap Introduce the capability to attach an eBPF program to cpumap entries. The idea behind this feature is to add the possibility to define on which CPU run the eBPF program if the underlying hw does not support RSS. Current supported verdicts are XDP_DROP and XDP_PASS. This patch has been tested on Marvell ESPRESSObin using xdp_redirect_cpu sample available in the kernel tree to identify possible performance regressions. Results show there are no observable differences in packet-per-second: $./xdp_redirect_cpu --progname xdp_cpu_map0 --dev eth0 --cpu 1 rx: 354.8 Kpps rx: 356.0 Kpps rx: 356.8 Kpps rx: 356.3 Kpps rx: 356.6 Kpps rx: 356.6 Kpps rx: 356.7 Kpps rx: 355.8 Kpps rx: 356.8 Kpps rx: 356.8 Kpps Co-developed-by: Jesper Dangaard Brouer Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/5c9febdf903d810b3415732e5cd98491d7d9067a.1594734381.git.lorenzo@kernel.org --- net/core/dev.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b61075828358..b820527f0a8d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5448,6 +5448,8 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) for (i = 0; i < new->aux->used_map_cnt; i++) { if (dev_map_can_have_prog(new->aux->used_maps[i])) return -EINVAL; + if (cpu_map_prog_allowed(new->aux->used_maps[i])) + return -EINVAL; } } @@ -8875,6 +8877,13 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return -EINVAL; } + if (prog->expected_attach_type == BPF_XDP_CPUMAP) { + NL_SET_ERR_MSG(extack, + "BPF_XDP_CPUMAP programs can not be attached to a device"); + bpf_prog_put(prog); + return -EINVAL; + } + /* prog->aux->id may be 0 for orphaned device-bound progs */ if (prog->aux->id && prog->aux->id == prog_id) { bpf_prog_put(prog); -- cgit From 632ca50f2cbd8809a2fc1934b4b2c9c49bd55e98 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Tue, 7 Jul 2020 17:28:04 +0206 Subject: af_packet: TPACKET_V3: replace busy-wait loop A busy-wait loop is used to implement waiting for bits to be copied from the skb to the kernel buffer before retiring a block. This is a problem on PREEMPT_RT because the copying task could be preempted by the busy-waiting task and thus live lock in the busy-wait loop. Replace the busy-wait logic with an rwlock_t. This provides lockdep coverage and makes the code RT ready. Signed-off-by: John Ogness Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 20 ++++++++++---------- net/packet/internal.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7b436ebde61d..781fee93b7d5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -593,6 +593,7 @@ static void init_prb_bdqc(struct packet_sock *po, req_u->req3.tp_block_size); p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + rwlock_init(&p1->blk_fill_in_prog_lock); p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); @@ -659,10 +660,9 @@ static void prb_retire_rx_blk_timer_expired(struct timer_list *t) * */ if (BLOCK_NUM_PKTS(pbd)) { - while (atomic_read(&pkc->blk_fill_in_prog)) { - /* Waiting for skb_copy_bits to finish... */ - cpu_relax(); - } + /* Waiting for skb_copy_bits to finish... */ + write_lock(&pkc->blk_fill_in_prog_lock); + write_unlock(&pkc->blk_fill_in_prog_lock); } if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { @@ -921,10 +921,9 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, * the timer-handler already handled this case. */ if (!(status & TP_STATUS_BLK_TMO)) { - while (atomic_read(&pkc->blk_fill_in_prog)) { - /* Waiting for skb_copy_bits to finish... */ - cpu_relax(); - } + /* Waiting for skb_copy_bits to finish... */ + write_lock(&pkc->blk_fill_in_prog_lock); + write_unlock(&pkc->blk_fill_in_prog_lock); } prb_close_block(pkc, pbd, po, status); return; @@ -944,7 +943,8 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); - atomic_dec(&pkc->blk_fill_in_prog); + + read_unlock(&pkc->blk_fill_in_prog_lock); } static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, @@ -998,7 +998,7 @@ static void prb_fill_curr_block(char *curr, pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); BLOCK_NUM_PKTS(pbd) += 1; - atomic_inc(&pkc->blk_fill_in_prog); + read_lock(&pkc->blk_fill_in_prog_lock); prb_run_all_ft_ops(pkc, ppd); } diff --git a/net/packet/internal.h b/net/packet/internal.h index 907f4cd2a718..fd41ecb7f605 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -39,7 +39,7 @@ struct tpacket_kbdq_core { char *nxt_offset; struct sk_buff *skb; - atomic_t blk_fill_in_prog; + rwlock_t blk_fill_in_prog_lock; /* Default is set to 8ms */ #define DEFAULT_PRB_RETIRE_TOV (8) -- cgit From 55f656cdb851bae32980d83d2494201e79d93b63 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 14 Jul 2020 20:03:07 +0300 Subject: net: sched: Do not drop root lock in tcf_qevent_handle() Mirred currently does not mix well with blocks executed after the qdisc root lock is taken. This includes classification blocks (such as in PRIO, ETS, DRR qdiscs) and qevents. The locking caused by the packet mirrored by mirred can cause deadlocks: either when the thread of execution attempts to take the lock a second time, or when two threads end up waiting on each other's locks. The qevent patchset attempted to not introduce further badness of this sort, and dropped the lock before executing the qevent block. However this lead to too little locking and races between qdisc configuration and packet enqueue in the RED qdisc. Before the deadlock issues are solved in a way that can be applied across many qdiscs reasonably easily, do for qevents what is done for the classification blocks and just keep holding the root lock. Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 8 +------- net/sched/sch_red.c | 6 +++--- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 322b279154de..b2b7440c2ae7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3822,7 +3822,7 @@ int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index EXPORT_SYMBOL(tcf_qevent_validate_change); struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb, - spinlock_t *root_lock, struct sk_buff **to_free, int *ret) + struct sk_buff **to_free, int *ret) { struct tcf_result cl_res; struct tcf_proto *fl; @@ -3832,9 +3832,6 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru fl = rcu_dereference_bh(qe->filter_chain); - if (root_lock) - spin_unlock(root_lock); - switch (tcf_classify(skb, fl, &cl_res, false)) { case TC_ACT_SHOT: qdisc_qstats_drop(sch); @@ -3853,9 +3850,6 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru return NULL; } - if (root_lock) - spin_lock(root_lock); - return skb; } EXPORT_SYMBOL(tcf_qevent_handle); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index de2be4d04ed6..a79602f7fab8 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -94,7 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ if (INET_ECN_set_ce(skb)) { q->stats.prob_mark++; - skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret); + skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); if (!skb) return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { @@ -114,7 +114,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ if (INET_ECN_set_ce(skb)) { q->stats.forced_mark++; - skb = tcf_qevent_handle(&q->qe_mark, sch, skb, root_lock, to_free, &ret); + skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); if (!skb) return NET_XMIT_CN | ret; } else if (!red_use_nodrop(q)) { @@ -137,7 +137,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ return ret; congestion_drop: - skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, root_lock, to_free, &ret); + skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret); if (!skb) return NET_XMIT_CN | ret; -- cgit From ac5c66f261b7174d0d9aaeb2bf9f8c2c2dbad0bd Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 14 Jul 2020 20:03:08 +0300 Subject: Revert "net: sched: Pass root lock to Qdisc_ops.enqueue" This reverts commit aebe4426ccaa4838f36ea805cdf7d76503e65117. Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 ++-- net/sched/sch_atm.c | 4 ++-- net/sched/sch_blackhole.c | 2 +- net/sched/sch_cake.c | 2 +- net/sched/sch_cbq.c | 4 ++-- net/sched/sch_cbs.c | 18 +++++++++--------- net/sched/sch_choke.c | 2 +- net/sched/sch_codel.c | 2 +- net/sched/sch_drr.c | 4 ++-- net/sched/sch_dsmark.c | 4 ++-- net/sched/sch_etf.c | 2 +- net/sched/sch_ets.c | 4 ++-- net/sched/sch_fifo.c | 6 +++--- net/sched/sch_fq.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_generic.c | 4 ++-- net/sched/sch_gred.c | 2 +- net/sched/sch_hfsc.c | 6 +++--- net/sched/sch_hhf.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_multiq.c | 4 ++-- net/sched/sch_netem.c | 8 ++++---- net/sched/sch_pie.c | 2 +- net/sched/sch_plug.c | 2 +- net/sched/sch_prio.c | 6 +++--- net/sched/sch_qfq.c | 4 ++-- net/sched/sch_red.c | 4 ++-- net/sched/sch_sfb.c | 4 ++-- net/sched/sch_sfq.c | 2 +- net/sched/sch_skbprio.c | 2 +- net/sched/sch_taprio.c | 4 ++-- net/sched/sch_tbf.c | 10 +++++----- net/sched/sch_teql.c | 4 ++-- 34 files changed, 69 insertions(+), 69 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b61075828358..062a00fdca9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3749,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); if (unlikely(to_free)) @@ -3792,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK; + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1d5e422d9be2..1c281cc81f57 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, /* --------------------------- Qdisc operations ---------------------------- */ -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct atm_qdisc_data *p = qdisc_priv(sch); @@ -432,7 +432,7 @@ done: #endif } - ret = qdisc_enqueue(skb, flow->q, root_lock, to_free); + ret = qdisc_enqueue(skb, flow->q, to_free); if (ret != NET_XMIT_SUCCESS) { drop: __maybe_unused if (net_xmit_drop_count(ret)) { diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index 187644657c4f..a7f7667ae984 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -13,7 +13,7 @@ #include #include -static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index b3cdcd86cbfd..561d20c9adca 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1687,7 +1687,7 @@ hash: static void cake_reconfigure(struct Qdisc *sch); -static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cake_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9e16fdf47fe7..b2130df933a7 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -356,7 +356,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) } static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -373,7 +373,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, return ret; } - ret = qdisc_enqueue(skb, cl->q, root_lock, to_free); + ret = qdisc_enqueue(skb, cl->q, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; cbq_mark_toplevel(q, cl); diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 7af15ebe07f7..2eaac2ff380f 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -77,7 +77,7 @@ struct cbs_sched_data { s64 sendslope; /* in bytes/s */ s64 idleslope; /* in bytes/s */ struct qdisc_watchdog watchdog; - int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; @@ -85,13 +85,13 @@ struct cbs_sched_data { }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct Qdisc *child, spinlock_t *root_lock, + struct Qdisc *child, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); int err; - err = child->ops->enqueue(skb, child, root_lock, to_free); + err = child->ops->enqueue(skb, child, to_free); if (err != NET_XMIT_SUCCESS) return err; @@ -101,16 +101,16 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } -static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; - return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free); + return cbs_child_enqueue(skb, sch, qdisc, to_free); } -static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); @@ -124,15 +124,15 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t * q->last = ktime_get_ns(); } - return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free); + return cbs_child_enqueue(skb, sch, qdisc, to_free); } -static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); - return q->enqueue(skb, sch, root_lock, to_free); + return q->enqueue(skb, sch, to_free); } /* timediff is in ns, slope is in bytes/s */ diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index baf3faee31aa..bd618b00d319 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -210,7 +210,7 @@ static bool choke_match_random(const struct choke_sched_data *q, return choke_match_flow(oskb, nskb); } -static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct choke_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 1d94837abdd8..30169b3adbbb 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -108,7 +108,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) return skb; } -static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct codel_sched_data *q; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index ad14df2ecf3a..dde564670ad8 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -337,7 +337,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, return NULL; } -static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -355,7 +355,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 76a9c4f277f2..2b88710994d7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -198,7 +198,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, /* --------------------------- Qdisc operations ---------------------------- */ -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -267,7 +267,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *ro } } - err = qdisc_enqueue(skb, p->q, root_lock, to_free); + err = qdisc_enqueue(skb, p->q, to_free); if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 7a7c50a68115..c48f91075b5c 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -160,7 +160,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) } static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, - spinlock_t *root_lock, struct sk_buff **to_free) + struct sk_buff **to_free) { struct etf_sched_data *q = qdisc_priv(sch); struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 1af86f30b18e..c1e84d1eeaba 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -415,7 +415,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch, return &q->classes[band]; } -static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); @@ -433,7 +433,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b4da5b624ad8..a579a4131d22 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -16,7 +16,7 @@ /* 1 band FIFO pseudo-"scheduler" */ -static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) @@ -25,7 +25,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *roo return qdisc_drop(skb, sch, to_free); } -static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { if (likely(sch->q.qlen < sch->limit)) @@ -34,7 +34,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *roo return qdisc_drop(skb, sch, to_free); } -static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int prev_backlog; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a90d745c41e0..2fb76fc0cc31 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -439,7 +439,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); } -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index bca016ffc069..3106653c17f3 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -181,7 +181,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, return idx; } -static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct fq_codel_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 741840b9994f..f98c74018805 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -125,7 +125,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, skb->next = NULL; } -static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct fq_pie_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 715cde1df9e4..265a61d011df 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -520,7 +520,7 @@ EXPORT_SYMBOL(netif_carrier_off); cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock, +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { __qdisc_drop(skb, to_free); @@ -614,7 +614,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, return &priv->q[band]; } -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock, +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { int band = prio2band[skb->priority & TC_PRIO_MAX]; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 7d67c6cd6605..8599c6f31b05 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -161,7 +161,7 @@ static bool gred_per_vq_red_flags_used(struct gred_sched *table) return false; } -static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct gred_sched_data *q = NULL; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 13ba8648bb63..0f5f121404f3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1528,8 +1528,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) return -1; } -static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, - struct sk_buff **to_free) +static int +hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; @@ -1545,7 +1545,7 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root } first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index ddc6bf1d85d0..420ede875322 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -368,7 +368,7 @@ static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free) return bucket - q->buckets; } -static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct hhf_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index b07f29059f47..ba37defaca7a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -576,7 +576,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) cl->prio_activity = 0; } -static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { int uninitialized_var(ret); @@ -599,7 +599,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ __qdisc_drop(skb, to_free); return ret; #endif - } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, root_lock, + } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, to_free)) != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) { qdisc_qstats_drop(sch); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 56bdc4bcdc63..5c27b4270b90 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -57,7 +57,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) } static int -multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct Qdisc *qdisc; @@ -74,7 +74,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, } #endif - ret = qdisc_enqueue(skb, qdisc, root_lock, to_free); + ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 8fb17483a34f..84f82771cdf5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -431,7 +431,7 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, * NET_XMIT_DROP: queue length didn't change. * NET_XMIT_SUCCESS: one skb was queued. */ -static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct netem_sched_data *q = qdisc_priv(sch); @@ -480,7 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *roo u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; - rootq->enqueue(skb2, rootq, root_lock, to_free); + rootq->enqueue(skb2, rootq, to_free); q->duplicate = dupsave; rc_drop = NET_XMIT_SUCCESS; } @@ -604,7 +604,7 @@ finish_segs: skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; last_len = segs->len; - rc = qdisc_enqueue(segs, sch, root_lock, to_free); + rc = qdisc_enqueue(segs, sch, to_free); if (rc != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(rc)) qdisc_qstats_drop(sch); @@ -720,7 +720,7 @@ deliver: struct sk_buff *to_free = NULL; int err; - err = qdisc_enqueue(skb, q->qdisc, NULL, &to_free); + err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); if (err != NET_XMIT_SUCCESS && net_xmit_drop_count(err)) { diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index b305313b64e3..c65077f0c0f3 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -82,7 +82,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, } EXPORT_SYMBOL_GPL(pie_drop_early); -static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct pie_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index e5f8b4769b4d..cbc2ebca4548 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -84,7 +84,7 @@ struct plug_sched_data { u32 pkts_to_release; }; -static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct plug_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 46b7ce81c6e3..3eabb871a1d5 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -65,8 +65,8 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return q->queues[band]; } -static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, - struct sk_buff **to_free) +static int +prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct Qdisc *qdisc; @@ -83,7 +83,7 @@ static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root } #endif - ret = qdisc_enqueue(skb, qdisc, root_lock, to_free); + ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { sch->qstats.backlog += len; sch->q.qlen++; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 3cfe6262eb00..6335230a971e 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1194,7 +1194,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb), gso_segs; @@ -1225,7 +1225,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; first = !cl->qdisc->q.qlen; - err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); if (net_xmit_drop_count(err)) { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a79602f7fab8..4cc0ad0b1189 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -67,7 +67,7 @@ static int red_use_nodrop(struct red_sched_data *q) return q->flags & TC_RED_NODROP; } -static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct red_sched_data *q = qdisc_priv(sch); @@ -126,7 +126,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ break; } - ret = qdisc_enqueue(skb, child, root_lock, to_free); + ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 356f6d1d30db..da047a37a3bf 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -276,7 +276,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, return false; } -static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -399,7 +399,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ } enqueue: - ret = qdisc_enqueue(skb, child, root_lock, to_free); + ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a1314510dc69..cae5dbbadc1c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -343,7 +343,7 @@ static int sfq_headdrop(const struct sfq_sched_data *q) } static int -sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, struct sk_buff **to_free) +sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash, dropped; diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index f75f237c4436..7a5e4c454715 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -65,7 +65,7 @@ static u16 calc_new_low_prio(const struct skbprio_sched_data *q) return SKBPRIO_MAX_PRIORITY - 1; } -static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index daef2ff60a98..e981992634dd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -410,7 +410,7 @@ done: return txtime; } -static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); @@ -435,7 +435,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *ro qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; - return qdisc_enqueue(skb, child, root_lock, to_free); + return qdisc_enqueue(skb, child, to_free); } static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c3eb5cdb83a8..78e79029dc63 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -187,7 +187,7 @@ static int tbf_offload_dump(struct Qdisc *sch) /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ -static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -206,7 +206,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ skb_mark_not_on_list(segs); qdisc_skb_cb(segs)->pkt_len = segs->len; len += segs->len; - ret = qdisc_enqueue(segs, q->qdisc, root_lock, to_free); + ret = qdisc_enqueue(segs, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); @@ -221,7 +221,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } -static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, +static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -231,10 +231,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_ if (qdisc_pkt_len(skb) > q->max_size) { if (skb_is_gso(skb) && skb_gso_validate_mac_len(skb, q->max_size)) - return tbf_segment(skb, sch, root_lock, to_free); + return tbf_segment(skb, sch, to_free); return qdisc_drop(skb, sch, to_free); } - ret = qdisc_enqueue(skb, q->qdisc, root_lock, to_free); + ret = qdisc_enqueue(skb, q->qdisc, to_free); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index b586eec2eaeb..2f1f0a378408 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -72,8 +72,8 @@ struct teql_sched_data { /* "teql*" qdisc routines */ -static int teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock, - struct sk_buff **to_free) +static int +teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); -- cgit From 0a0d93b943a20e4210491911e82c3b4a34391ce4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Jul 2020 15:02:30 +0800 Subject: xfrm: interface: use IS_REACHABLE to avoid some compile errors kernel test robot reported some compile errors: ia64-linux-ld: net/xfrm/xfrm_interface.o: in function `xfrmi4_fini': net/xfrm/xfrm_interface.c:900: undefined reference to `xfrm4_tunnel_deregister' ia64-linux-ld: net/xfrm/xfrm_interface.c:901: undefined reference to `xfrm4_tunnel_deregister' ia64-linux-ld: net/xfrm/xfrm_interface.o: in function `xfrmi4_init': net/xfrm/xfrm_interface.c:873: undefined reference to `xfrm4_tunnel_register' ia64-linux-ld: net/xfrm/xfrm_interface.c:876: undefined reference to `xfrm4_tunnel_register' ia64-linux-ld: net/xfrm/xfrm_interface.c:885: undefined reference to `xfrm4_tunnel_deregister' This happened when set CONFIG_XFRM_INTERFACE=y and CONFIG_INET_TUNNEL=m. We don't really want xfrm_interface to depend inet_tunnel completely, but only to disable the tunnel code when inet_tunnel is not seen. So instead of adding "select INET_TUNNEL" for XFRM_INTERFACE, this patch is only to change to IS_REACHABLE to avoid these compile error. Reported-by: kernel test robot Fixes: da9bbf0598c9 ("xfrm: interface: support IPIP and IPIP6 tunnels processing with .cb_handler") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 63a52b4b6ea9..4c904d332007 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -812,7 +812,7 @@ static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .priority = 10, }; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) static int xfrmi6_rcv_tunnel(struct sk_buff *skb) { const xfrm_address_t *saddr; @@ -863,7 +863,7 @@ static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = { .priority = 10, }; -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) static int xfrmi4_rcv_tunnel(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); @@ -897,7 +897,7 @@ static int __init xfrmi4_init(void) err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ipip_failed; @@ -908,7 +908,7 @@ static int __init xfrmi4_init(void) return 0; -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) xfrm_tunnel_ipip6_failed: xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); xfrm_tunnel_ipip_failed: @@ -924,7 +924,7 @@ xfrm_proto_esp_failed: static void xfrmi4_fini(void) { -#if IS_ENABLED(CONFIG_INET_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6); xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); #endif @@ -946,7 +946,7 @@ static int __init xfrmi6_init(void) err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipv6_failed; @@ -957,7 +957,7 @@ static int __init xfrmi6_init(void) return 0; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm_tunnel_ip6ip_failed: xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); xfrm_tunnel_ipv6_failed: @@ -973,7 +973,7 @@ xfrm_proto_esp_failed: static void xfrmi6_fini(void) { -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); #endif -- cgit From 96a208295040c00c85d022720c10ec1a751b9bdb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Jul 2020 15:03:14 +0800 Subject: ip6_vti: use IS_REACHABLE to avoid some compile errors Naresh reported some compile errors: arm build failed due this error on linux-next 20200713 and 20200713 net/ipv6/ip6_vti.o: In function `vti6_rcv_tunnel': ip6_vti.c:(.text+0x1d20): undefined reference to `xfrm6_tunnel_spi_lookup' This happened when set CONFIG_IPV6_VTI=y and CONFIG_INET6_TUNNEL=m. We don't really want ip6_vti to depend inet6_tunnel completely, but only to disable the tunnel code when inet6_tunnel is not seen. So instead of adding "select INET6_TUNNEL" for IPV6_VTI, this patch is only to change to IS_REACHABLE to avoid these compile error. Reported-by: Naresh Kamboju Fixes: 08622869ed3f ("ip6_vti: support IP6IP6 tunnel processing with .cb_handler") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 18ec4ab45be7..53f12b40528e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1218,7 +1218,7 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) static int vti6_rcv_tunnel(struct sk_buff *skb) { const xfrm_address_t *saddr; @@ -1270,7 +1270,7 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) msg = "ipv6 tunnel"; err = xfrm6_tunnel_register(&vti_ipv6_handler, AF_INET6); if (err < 0) @@ -1288,7 +1288,7 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) err = xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); vti_tunnel_ip6ip_failed: err = xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); @@ -1312,7 +1312,7 @@ pernet_dev_failed: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); -#if IS_ENABLED(CONFIG_INET6_XFRM_TUNNEL) +#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm6_tunnel_deregister(&vti_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&vti_ipv6_handler, AF_INET6); #endif -- cgit From eac87c413bf9794c14d488998a5265ea5b32f04e Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Wed, 15 Jul 2020 14:09:28 +0200 Subject: net: openvswitch: reorder masks array based on usage This patch reorders the masks array every 4 seconds based on their usage count. This greatly reduces the masks per packet hit, and hence the overall performance. Especially in the OVS/OVN case for OpenShift. Here are some results from the OVS/OVN OpenShift test, which use 8 pods, each pod having 512 uperf connections, each connection sends a 64-byte request and gets a 1024-byte response (TCP). All uperf clients are on 1 worker node while all uperf servers are on the other worker node. Kernel without this patch : 7.71 Gbps Kernel with this patch applied: 14.52 Gbps We also run some tests to verify the rebalance activity does not lower the flow insertion rate, which does not. Signed-off-by: Eelco Chaudron Tested-by: Andrew Theurer Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 22 ++++++ net/openvswitch/datapath.h | 8 +- net/openvswitch/flow_table.c | 173 +++++++++++++++++++++++++++++++++++++++++-- net/openvswitch/flow_table.h | 11 +++ 4 files changed, 207 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 94b024534987..95805f0e27bd 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -130,6 +130,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *, uint32_t cutlen); +static void ovs_dp_masks_rebalance(struct work_struct *work); + /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -1653,6 +1655,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); + INIT_DELAYED_WORK(&dp->masks_rebalance, ovs_dp_masks_rebalance); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); @@ -1712,6 +1715,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); + schedule_delayed_work(&dp->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); + ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); @@ -1756,6 +1762,9 @@ static void __dp_destroy(struct datapath *dp) /* RCU destroy the flow table */ call_rcu(&dp->rcu, destroy_dp_rcu); + + /* Cancel remaining work. */ + cancel_delayed_work_sync(&dp->masks_rebalance); } static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -2338,6 +2347,19 @@ out: return skb->len; } +static void ovs_dp_masks_rebalance(struct work_struct *work) +{ + struct datapath *dp = container_of(work, struct datapath, + masks_rebalance.work); + + ovs_lock(); + ovs_flow_masks_rebalance(&dp->table); + ovs_unlock(); + + schedule_delayed_work(&dp->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); +} + static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 2016dd107939..697a2354194b 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -20,8 +20,9 @@ #include "meter.h" #include "vport-internal_dev.h" -#define DP_MAX_PORTS USHRT_MAX -#define DP_VPORT_HASH_BUCKETS 1024 +#define DP_MAX_PORTS USHRT_MAX +#define DP_VPORT_HASH_BUCKETS 1024 +#define DP_MASKS_REBALANCE_INTERVAL 4000 /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given @@ -83,6 +84,9 @@ struct datapath { /* Switch meters. */ struct dp_meter_table meter_tbl; + + /* re-balance flow masks timer */ + struct delayed_work masks_rebalance; }; /** diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 2398d7238300..af22c9ee28dd 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -169,16 +170,70 @@ static struct table_instance *table_instance_alloc(int new_size) return ti; } +static void __mask_array_destroy(struct mask_array *ma) +{ + free_percpu(ma->masks_usage_cntr); + kfree(ma); +} + +static void mask_array_rcu_cb(struct rcu_head *rcu) +{ + struct mask_array *ma = container_of(rcu, struct mask_array, rcu); + + __mask_array_destroy(ma); +} + +static void tbl_mask_array_reset_counters(struct mask_array *ma) +{ + int i, cpu; + + /* As the per CPU counters are not atomic we can not go ahead and + * reset them from another CPU. To be able to still have an approximate + * zero based counter we store the value at reset, and subtract it + * later when processing. + */ + for (i = 0; i < ma->max; i++) { + ma->masks_usage_zero_cntr[i] = 0; + + for_each_possible_cpu(cpu) { + u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr, + cpu); + unsigned int start; + u64 counter; + + do { + start = u64_stats_fetch_begin_irq(&ma->syncp); + counter = usage_counters[i]; + } while (u64_stats_fetch_retry_irq(&ma->syncp, start)); + + ma->masks_usage_zero_cntr[i] += counter; + } + } +} + static struct mask_array *tbl_mask_array_alloc(int size) { struct mask_array *new; size = max(MASK_ARRAY_SIZE_MIN, size); new = kzalloc(sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * size, GFP_KERNEL); + sizeof(struct sw_flow_mask *) * size + + sizeof(u64) * size, GFP_KERNEL); if (!new) return NULL; + new->masks_usage_zero_cntr = (u64 *)((u8 *)new + + sizeof(struct mask_array) + + sizeof(struct sw_flow_mask *) * + size); + + new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size, + __alignof__(u64)); + if (!new->masks_usage_cntr) { + kfree(new); + return NULL; + } + new->count = 0; new->max = size; @@ -202,10 +257,10 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size) if (ovsl_dereference(old->masks[i])) new->masks[new->count++] = old->masks[i]; } + call_rcu(&old->rcu, mask_array_rcu_cb); } rcu_assign_pointer(tbl->mask_array, new); - kfree_rcu(old, rcu); return 0; } @@ -223,6 +278,11 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl, return err; ma = ovsl_dereference(tbl->mask_array); + } else { + /* On every add or delete we need to reset the counters so + * every new mask gets a fair chance of being prioritized. + */ + tbl_mask_array_reset_counters(ma); } BUG_ON(ovsl_dereference(ma->masks[ma_count])); @@ -260,6 +320,9 @@ found: if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && ma_count <= (ma->max / 3)) tbl_mask_array_realloc(tbl, ma->max / 2); + else + tbl_mask_array_reset_counters(ma); + } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -312,7 +375,7 @@ int ovs_flow_tbl_init(struct flow_table *table) free_ti: __table_instance_destroy(ti); free_mask_array: - kfree(ma); + __mask_array_destroy(ma); free_mask_cache: free_percpu(table->mask_cache); return -ENOMEM; @@ -392,7 +455,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); free_percpu(table->mask_cache); - kfree_rcu(rcu_dereference_raw(table->mask_array), rcu); + call_rcu(&table->mask_array->rcu, mask_array_rcu_cb); table_instance_destroy(table, ti, ufid_ti, false); } @@ -606,6 +669,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u32 *n_mask_hit, u32 *index) { + u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr); struct sw_flow *flow; struct sw_flow_mask *mask; int i; @@ -614,8 +678,12 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); - if (flow) + if (flow) { + u64_stats_update_begin(&ma->syncp); + usage_counters[*index]++; + u64_stats_update_end(&ma->syncp); return flow; + } } } @@ -631,6 +699,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ *index = i; + u64_stats_update_begin(&ma->syncp); + usage_counters[*index]++; + u64_stats_update_end(&ma->syncp); return flow; } } @@ -934,6 +1005,98 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, return 0; } +static int compare_mask_and_count(const void *a, const void *b) +{ + const struct mask_count *mc_a = a; + const struct mask_count *mc_b = b; + + return (s64)mc_b->counter - (s64)mc_a->counter; +} + +/* Must be called with OVS mutex held. */ +void ovs_flow_masks_rebalance(struct flow_table *table) +{ + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); + struct mask_count *masks_and_count; + struct mask_array *new; + int masks_entries = 0; + int i; + + /* Build array of all current entries with use counters. */ + masks_and_count = kmalloc_array(ma->max, sizeof(*masks_and_count), + GFP_KERNEL); + if (!masks_and_count) + return; + + for (i = 0; i < ma->max; i++) { + struct sw_flow_mask *mask; + unsigned int start; + int cpu; + + mask = rcu_dereference_ovsl(ma->masks[i]); + if (unlikely(!mask)) + break; + + masks_and_count[i].index = i; + masks_and_count[i].counter = 0; + + for_each_possible_cpu(cpu) { + u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr, + cpu); + u64 counter; + + do { + start = u64_stats_fetch_begin_irq(&ma->syncp); + counter = usage_counters[i]; + } while (u64_stats_fetch_retry_irq(&ma->syncp, start)); + + masks_and_count[i].counter += counter; + } + + /* Subtract the zero count value. */ + masks_and_count[i].counter -= ma->masks_usage_zero_cntr[i]; + + /* Rather than calling tbl_mask_array_reset_counters() + * below when no change is needed, do it inline here. + */ + ma->masks_usage_zero_cntr[i] += masks_and_count[i].counter; + } + + if (i == 0) + goto free_mask_entries; + + /* Sort the entries */ + masks_entries = i; + sort(masks_and_count, masks_entries, sizeof(*masks_and_count), + compare_mask_and_count, NULL); + + /* If the order is the same, nothing to do... */ + for (i = 0; i < masks_entries; i++) { + if (i != masks_and_count[i].index) + break; + } + if (i == masks_entries) + goto free_mask_entries; + + /* Rebuilt the new list in order of usage. */ + new = tbl_mask_array_alloc(ma->max); + if (!new) + goto free_mask_entries; + + for (i = 0; i < masks_entries; i++) { + int index = masks_and_count[i].index; + + new->masks[new->count++] = + rcu_dereference_ovsl(ma->masks[index]); + } + + rcu_assign_pointer(table->mask_array, new); + call_rcu(&ma->rcu, mask_array_rcu_cb); + +free_mask_entries: + kfree(masks_and_count); +} + /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 8a5cea6ae111..1f664b050e3b 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -27,9 +27,17 @@ struct mask_cache_entry { u32 mask_index; }; +struct mask_count { + int index; + u64 counter; +}; + struct mask_array { struct rcu_head rcu; int count, max; + u64 __percpu *masks_usage_cntr; + u64 *masks_usage_zero_cntr; + struct u64_stats_sync syncp; struct sw_flow_mask __rcu *masks[]; }; @@ -86,4 +94,7 @@ bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *); void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask); + +void ovs_flow_masks_rebalance(struct flow_table *table); + #endif /* flow_table.h */ -- cgit From 8c728940487945e25cdfe020d58da42143aa98c1 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 15 Jul 2020 22:27:05 +0200 Subject: mptcp: silence warning in subflow_data_ready() since commit d47a72152097 ("mptcp: fix race in subflow_data_ready()"), it is possible to observe a regression in MP_JOIN kselftests. For sockets in TCP_CLOSE state, it's not sufficient to just wake up the main socket: we also need to ensure that received data are made available to the reader. Silence the WARN_ON_ONCE() in these cases: it preserves the syzkaller fix and restores kselftests when they are ran as follows: # while true; do > make KBUILD_OUTPUT=/tmp/kselftest TARGETS=net/mptcp kselftest > done Reported-by: Florian Westphal Fixes: d47a72152097 ("mptcp: fix race in subflow_data_ready()") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/47 Signed-off-by: Davide Caratti Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9f7f3772c13c..519122e66f17 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -869,18 +869,19 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) static void subflow_data_ready(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + u16 state = 1 << inet_sk_state_load(sk); struct sock *parent = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(parent); - if ((1 << inet_sk_state_load(sk)) & (TCPF_LISTEN | TCPF_CLOSE)) { + if (state & TCPF_LISTEN) { set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; } WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && - !subflow->mp_join); + !subflow->mp_join && !(state & TCPF_CLOSE)); if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); -- cgit From a71d77e6be1e29ec809cc7c85d9594e7769406cd Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Thu, 16 Jul 2020 12:12:34 -0700 Subject: tcp: fix segment accounting when DSACK range covers multiple segments Currently, while processing DSACK, we assume DSACK covers only one segment. This leads to significant underestimation of DSACKs with LRO/GRO. This patch fixes segment accounting with DSACK by estimating segment count from DSACK sequence range / MSS. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Yousuk Seung Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 80 +++++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b03ca68d4111..5d6bbcb1e570 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -871,12 +871,41 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } +struct tcp_sacktag_state { + /* Timestamps for earliest and latest never-retransmitted segment + * that was SACKed. RTO needs the earliest RTT to stay conservative, + * but congestion control should still get an accurate delay signal. + */ + u64 first_sackt; + u64 last_sackt; + u32 reord; + u32 sack_delivered; + int flag; + unsigned int mss_now; + struct rate_sample *rate; +}; + /* Take a notice that peer is sending D-SACKs */ -static void tcp_dsack_seen(struct tcp_sock *tp) +static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, + u32 end_seq, struct tcp_sacktag_state *state) { + u32 seq_len, dup_segs = 1; + + if (before(start_seq, end_seq)) { + seq_len = end_seq - start_seq; + if (seq_len > tp->mss_cache) + dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); + } + tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; tp->rack.dsack_seen = 1; - tp->dsack_dups++; + tp->dsack_dups += dup_segs; + + state->flag |= FLAG_DSACKING_ACK; + /* A spurious retransmission is delivered */ + state->sack_delivered += dup_segs; + + return dup_segs; } /* It's reordering when higher sequence was delivered (i.e. sacked) before @@ -1103,53 +1132,37 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack, static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, - u32 prior_snd_una) + u32 prior_snd_una, struct tcp_sacktag_state *state) { struct tcp_sock *tp = tcp_sk(sk); u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); - bool dup_sack = false; + u32 dup_segs; if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { - dup_sack = true; - tcp_dsack_seen(tp); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); - if (!after(end_seq_0, end_seq_1) && - !before(start_seq_0, start_seq_1)) { - dup_sack = true; - tcp_dsack_seen(tp); - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPDSACKOFORECV); - } + if (after(end_seq_0, end_seq_1) || before(start_seq_0, start_seq_1)) + return false; + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKOFORECV); + } else { + return false; } + dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state); + /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 && + if (tp->undo_marker && tp->undo_retrans > 0 && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) - tp->undo_retrans--; + tp->undo_retrans = max_t(int, 0, tp->undo_retrans - dup_segs); - return dup_sack; + return true; } -struct tcp_sacktag_state { - u32 reord; - /* Timestamps for earliest and latest never-retransmitted segment - * that was SACKed. RTO needs the earliest RTT to stay conservative, - * but congestion control should still get an accurate delay signal. - */ - u64 first_sackt; - u64 last_sackt; - struct rate_sample *rate; - int flag; - unsigned int mss_now; - u32 sack_delivered; -}; - /* Check if skb is fully within the SACK block. In presence of GSO skbs, * the incoming SACK may not exactly match but we can find smaller MSS * aligned portion of it that matches. Therefore we might need to fragment @@ -1692,12 +1705,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, tcp_highest_sack_reset(sk); found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, - num_sacks, prior_snd_una); - if (found_dup_sack) { - state->flag |= FLAG_DSACKING_ACK; - /* A spurious retransmission is delivered */ - state->sack_delivered++; - } + num_sacks, prior_snd_una, state); /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can -- cgit From e3a5a1e8b6548f5d37328e2d3571edc5c9e6d7c0 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Thu, 16 Jul 2020 12:12:35 -0700 Subject: tcp: add SNMP counter for no. of duplicate segments reported by DSACK There are two existing SNMP counters, TCPDSACKRecv and TCPDSACKOfoRecv, which are incremented depending on whether the DSACKed range is below the cumulative ACK sequence number or not. Unfortunately, these both implicitly assume each DSACK covers only one segment. This makes these counters unusable for estimating spurious retransmit rates, or real/non-spurious loss rate. This patch introduces a new SNMP counter, TCPDSACKRecvSegs, which tracks the estimated number of duplicate segments based on: (DSACKed sequence range) / MSS. This counter is usable for estimating spurious retransmit rates, or real/non-spurious loss rate. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 75545a829a2b..1074df726ec0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -292,6 +292,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY), SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), + SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5d6bbcb1e570..82906deb7874 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1153,6 +1153,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs); /* D-SACK for already forgotten data... Do dumb counting. */ if (tp->undo_marker && tp->undo_retrans > 0 && -- cgit From e0c3f4c4fdc0648cffedd5e70f16be7ab45340b4 Mon Sep 17 00:00:00 2001 From: Suraj Upadhyay Date: Fri, 17 Jul 2020 00:46:45 +0530 Subject: net: decnet: af_decnet: Simplify goto loop. Replace goto loop with while loop. Signed-off-by: Suraj Upadhyay Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 0a46ea3bddd5..7d7ae2dd69b8 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2134,14 +2134,11 @@ static struct sock *dn_socket_get_next(struct seq_file *seq, struct dn_iter_state *state = seq->private; n = sk_next(n); -try_again: - if (n) - goto out; - if (++state->bucket >= DN_SK_HASH_SIZE) - goto out; - n = sk_head(&dn_sk_hash[state->bucket]); - goto try_again; -out: + while (!n) { + if (++state->bucket >= DN_SK_HASH_SIZE) + break; + n = sk_head(&dn_sk_hash[state->bucket]); + } return n; } -- cgit From e9ddbb7707ff5891616240026062b8c1e29864ca Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:23 +0200 Subject: bpf: Introduce SK_LOOKUP program type with a dedicated attach point Add a new program type BPF_PROG_TYPE_SK_LOOKUP with a dedicated attach type BPF_SK_LOOKUP. The new program kind is to be invoked by the transport layer when looking up a listening socket for a new connection request for connection oriented protocols, or when looking up an unconnected socket for a packet for connection-less protocols. When called, SK_LOOKUP BPF program can select a socket that will receive the packet. This serves as a mechanism to overcome the limits of what bind() API allows to express. Two use-cases driving this work are: (1) steer packets destined to an IP range, on fixed port to a socket 192.0.2.0/24, port 80 -> NGINX socket (2) steer packets destined to an IP address, on any port to a socket 198.51.100.1, any port -> L7 proxy socket In its run-time context program receives information about the packet that triggered the socket lookup. Namely IP version, L4 protocol identifier, and address 4-tuple. Context can be further extended to include ingress interface identifier. To select a socket BPF program fetches it from a map holding socket references, like SOCKMAP or SOCKHASH, and calls bpf_sk_assign(ctx, sk, ...) helper to record the selection. Transport layer then uses the selected socket as a result of socket lookup. In its basic form, SK_LOOKUP acts as a filter and hence must return either SK_PASS or SK_DROP. If the program returns with SK_PASS, transport should look for a socket to receive the packet, or use the one selected by the program if available, while SK_DROP informs the transport layer that the lookup should fail. This patch only enables the user to attach an SK_LOOKUP program to a network namespace. Subsequent patches hook it up to run on local delivery path in ipv4 and ipv6 stacks. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-3-jakub@cloudflare.com --- net/core/filter.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index bdd2382e655d..d099436b3ff5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9229,6 +9229,186 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; + +BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, + struct sock *, sk, u64, flags) +{ + if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE | + BPF_SK_LOOKUP_F_NO_REUSEPORT))) + return -EINVAL; + if (unlikely(sk && sk_is_refcounted(sk))) + return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ + if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) + return -ESOCKTNOSUPPORT; /* reject connected sockets */ + + /* Check if socket is suitable for packet L3/L4 protocol */ + if (sk && sk->sk_protocol != ctx->protocol) + return -EPROTOTYPE; + if (sk && sk->sk_family != ctx->family && + (sk->sk_family == AF_INET || ipv6_only_sock(sk))) + return -EAFNOSUPPORT; + + if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE)) + return -EEXIST; + + /* Select socket as lookup result */ + ctx->selected_sk = sk; + ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT; + return 0; +} + +static const struct bpf_func_proto bpf_sk_lookup_assign_proto = { + .func = bpf_sk_lookup_assign, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL, + .arg3_type = ARG_ANYTHING, +}; + +static const struct bpf_func_proto * +sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_event_output_data_proto; + case BPF_FUNC_sk_assign: + return &bpf_sk_lookup_assign_proto; + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static bool sk_lookup_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= sizeof(struct bpf_sk_lookup)) + return false; + if (off % size != 0) + return false; + if (type != BPF_READ) + return false; + + switch (off) { + case offsetof(struct bpf_sk_lookup, sk): + info->reg_type = PTR_TO_SOCKET_OR_NULL; + return size == sizeof(__u64); + + case bpf_ctx_range(struct bpf_sk_lookup, family): + case bpf_ctx_range(struct bpf_sk_lookup, protocol): + case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4): + case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): + case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): + case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + case bpf_ctx_range(struct bpf_sk_lookup, local_port): + bpf_ctx_record_field_size(info, sizeof(__u32)); + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); + + default: + return false; + } +} + +static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct bpf_sk_lookup, sk): + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, selected_sk)); + break; + + case offsetof(struct bpf_sk_lookup, family): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + family, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, protocol): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + protocol, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, remote_ip4): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + v4.saddr, 4, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, local_ip4): + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + v4.daddr, 4, target_size)); + break; + + case bpf_ctx_range_till(struct bpf_sk_lookup, + remote_ip6[0], remote_ip6[3]): { +#if IS_ENABLED(CONFIG_IPV6) + int off = si->off; + + off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]); + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, v6.saddr)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + } + case bpf_ctx_range_till(struct bpf_sk_lookup, + local_ip6[0], local_ip6[3]): { +#if IS_ENABLED(CONFIG_IPV6) + int off = si->off; + + off -= offsetof(struct bpf_sk_lookup, local_ip6[0]); + off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, + offsetof(struct bpf_sk_lookup_kern, v6.daddr)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + } + case offsetof(struct bpf_sk_lookup, remote_port): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + sport, 2, target_size)); + break; + + case offsetof(struct bpf_sk_lookup, local_port): + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, + bpf_target_off(struct bpf_sk_lookup_kern, + dport, 2, target_size)); + break; + } + + return insn - insn_buf; +} + +const struct bpf_prog_ops sk_lookup_prog_ops = { +}; + +const struct bpf_verifier_ops sk_lookup_verifier_ops = { + .get_func_proto = sk_lookup_func_proto, + .is_valid_access = sk_lookup_is_valid_access, + .convert_ctx_access = sk_lookup_convert_ctx_access, +}; + #endif /* CONFIG_INET */ DEFINE_BPF_DISPATCHER(xdp) -- cgit From 80b373f74f9e28b0093930a6b95c929732f02512 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:24 +0200 Subject: inet: Extract helper for selecting socket from reuseport group Prepare for calling into reuseport from __inet_lookup_listener as well. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-4-jakub@cloudflare.com --- net/ipv4/inet_hashtables.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2bbaaf0c7176..ab64834837c8 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -246,6 +246,21 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 phash; + + if (sk->sk_reuseport) { + phash = inet_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); + } + return reuse_sk; +} + /* * Here are some nice properties to exploit here. The BSD API * does not allow a listening sock to specify the remote port nor the @@ -265,21 +280,17 @@ static struct sock *inet_lhash2_lookup(struct net *net, struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; int score, hiscore = 0; - u32 phash = 0; inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { sk = (struct sock *)icsk; score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - return result; - } + result = lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; hiscore = score; } -- cgit From 1559b4aa1db443096af493c7d621dc156054babe Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:25 +0200 Subject: inet: Run SK_LOOKUP BPF program on socket lookup Run a BPF program before looking up a listening socket on the receive path. Program selects a listening socket to yield as result of socket lookup by calling bpf_sk_assign() helper and returning SK_PASS code. Program can revert its decision by assigning a NULL socket with bpf_sk_assign(). Alternatively, BPF program can also fail the lookup by returning with SK_DROP, or let the lookup continue as usual with SK_PASS on return, when no socket has been selected with bpf_sk_assign(). This lets the user match packets with listening sockets freely at the last possible point on the receive path, where we know that packets are destined for local delivery after undergoing policing, filtering, and routing. With BPF code selecting the socket, directing packets destined to an IP range or to a port range to a single socket becomes possible. In case multiple programs are attached, they are run in series in the order in which they were attached. The end result is determined from return codes of all the programs according to following rules: 1. If any program returned SK_PASS and selected a valid socket, the socket is used as result of socket lookup. 2. If more than one program returned SK_PASS and selected a socket, last selection takes effect. 3. If any program returned SK_DROP, and no program returned SK_PASS and selected a socket, socket lookup fails with -ECONNREFUSED. 4. If all programs returned SK_PASS and none of them selected a socket, socket lookup continues to htable-based lookup. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-5-jakub@cloudflare.com --- net/core/filter.c | 3 +++ net/ipv4/inet_hashtables.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index d099436b3ff5..2bd129b5ae74 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9230,6 +9230,9 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; +DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled); +EXPORT_SYMBOL(bpf_sk_lookup_enabled); + BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, struct sock *, sk, u64, flags) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index ab64834837c8..4eb4cd8d20dd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -299,6 +299,29 @@ static struct sock *inet_lhash2_lookup(struct net *net, return result; } +static inline struct sock *inet_lookup_run_bpf(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -310,6 +333,14 @@ struct sock *__inet_lookup_listener(struct net *net, struct sock *result = NULL; unsigned int hash2; + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet_lookup_run_bpf(net, hashinfo, skb, doff, + saddr, sport, daddr, hnum); + if (result) + goto done; + } + hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); -- cgit From 5df6531292b5021ac9e4ed261eb7d1fa9ff3bf08 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:26 +0200 Subject: inet6: Extract helper for selecting socket from reuseport group Prepare for calling into reuseport from inet6_lookup_listener as well. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-6-jakub@cloudflare.com --- net/ipv6/inet6_hashtables.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index fbe9d4295eac..03942eef8ab6 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -111,6 +111,23 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 phash; + + if (sk->sk_reuseport) { + phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, phash, skb, doff); + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, @@ -123,21 +140,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_connection_sock *icsk; struct sock *sk, *result = NULL; int score, hiscore = 0; - u32 phash = 0; inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { sk = (struct sock *)icsk; score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { - if (sk->sk_reuseport) { - phash = inet6_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, phash, - skb, doff); - if (result) - return result; - } + result = lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; hiscore = score; } -- cgit From 1122702f02678597c4f1c7d316365ef502aafe08 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:27 +0200 Subject: inet6: Run SK_LOOKUP BPF program on socket lookup Following ipv4 stack changes, run a BPF program attached to netns before looking up a listening socket. Program can return a listening socket to use as result of socket lookup, fail the lookup, or take no action. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200717103536.397595-7-jakub@cloudflare.com --- net/ipv6/inet6_hashtables.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'net') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 03942eef8ab6..2d3add9e6116 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -21,6 +21,8 @@ #include #include +extern struct inet_hashinfo tcp_hashinfo; + u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) @@ -159,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net, return result; } +static inline struct sock *inet6_lookup_run_bpf(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -170,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net, struct sock *result = NULL; unsigned int hash2; + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, + saddr, sport, daddr, hnum); + if (result) + goto done; + } + hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); -- cgit From 7629c73a1466ec2348e9f64c874c19bf13f35f4c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:28 +0200 Subject: udp: Extract helper for selecting socket from reuseport group Prepare for calling into reuseport from __udp4_lib_lookup as well. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-8-jakub@cloudflare.com --- net/ipv4/udp.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 073d346f515c..9296faea3acf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -408,6 +408,25 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum) +{ + struct sock *reuse_sk = NULL; + u32 hash; + + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { + hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + /* Fall back to scoring if group has connections */ + if (reuseport_has_conns(sk, false)) + return NULL; + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, @@ -418,7 +437,6 @@ static struct sock *udp4_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; - u32 hash = 0; result = NULL; badness = 0; @@ -426,15 +444,11 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport && - sk->sk_state != TCP_ESTABLISHED) { - hash = udp_ehashfn(net, daddr, hnum, - saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; - } + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + if (result) + return result; + badness = score; result = sk; } -- cgit From 72f7e9440e9bd06f855b21eba09c1017395f430a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:29 +0200 Subject: udp: Run SK_LOOKUP BPF program on socket lookup Following INET/TCP socket lookup changes, modify UDP socket lookup to let BPF program select a receiving socket before searching for a socket by destination address and port as usual. Lookup of connected sockets that match packet 4-tuple is unaffected by this change. BPF program runs, and potentially overrides the lookup result, only if a 4-tuple match was not found. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-9-jakub@cloudflare.com --- net/ipv4/udp.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9296faea3acf..b738c63d7a77 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -456,6 +456,29 @@ static struct sock *udp4_lib_lookup2(struct net *net, return result; } +static inline struct sock *udp4_lookup_run_bpf(struct net *net, + struct udp_table *udptable, + struct sk_buff *skb, + __be32 saddr, __be16 sport, + __be32 daddr, u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + + no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -463,27 +486,45 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *udptable, struct sk_buff *skb) { - struct sock *result; unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; + struct sock *result, *sk; hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; + /* Lookup connected or non-wildcard socket */ result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); - if (!result) { - hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); - slot2 = hash2 & udptable->mask; - hslot2 = &udptable->hash2[slot2]; - - result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, sdif, - hslot2, skb); + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) + goto done; + + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp4_lookup_run_bpf(net, udptable, skb, + saddr, sport, daddr, hnum); + if (sk) { + result = sk; + goto done; + } } + + /* Got non-wildcard socket or error on first lookup */ + if (result) + goto done; + + /* Lookup wildcard sockets */ + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp4_lib_lookup2(net, saddr, sport, + htonl(INADDR_ANY), hnum, dif, sdif, + hslot2, skb); +done: if (IS_ERR(result)) return NULL; return result; -- cgit From 2a08748cd384cdfb8e1222bd3645a8d1d36e6a5d Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:30 +0200 Subject: udp6: Extract helper for selecting socket from reuseport group Prepare for calling into reuseport from __udp6_lib_lookup as well. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-10-jakub@cloudflare.com --- net/ipv6/udp.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 38c0d9350c6b..084205c18a33 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -141,6 +141,27 @@ static int compute_score(struct sock *sk, struct net *net, return score; } +static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum) +{ + struct sock *reuse_sk = NULL; + u32 hash; + + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { + hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + reuse_sk = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + /* Fall back to scoring if group has connections */ + if (reuseport_has_conns(sk, false)) + return NULL; + } + return reuse_sk; +} + /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -150,7 +171,6 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; int score, badness; - u32 hash = 0; result = NULL; badness = -1; @@ -158,16 +178,11 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport && - sk->sk_state != TCP_ESTABLISHED) { - hash = udp6_ehashfn(net, daddr, hnum, - saddr, sport); - - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; - } + result = lookup_reuseport(net, sk, skb, + saddr, sport, daddr, hnum); + if (result) + return result; + result = sk; badness = score; } -- cgit From 6d4201b1386b335bb53628a04ad63e547872dc5a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Fri, 17 Jul 2020 12:35:31 +0200 Subject: udp6: Run SK_LOOKUP BPF program on socket lookup Same as for udp4, let BPF program override the socket lookup result, by selecting a receiving socket of its choice or failing the lookup, if no connected UDP socket matched packet 4-tuple. Suggested-by: Marek Majkowski Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200717103536.397595-11-jakub@cloudflare.com --- net/ipv6/udp.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 084205c18a33..ff8be202726a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -190,6 +190,31 @@ static struct sock *udp6_lib_lookup2(struct net *net, return result; } +static inline struct sock *udp6_lookup_run_bpf(struct net *net, + struct udp_table *udptable, + struct sk_buff *skb, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool no_reuseport; + + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + + no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, + saddr, sport, daddr, hnum, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + + reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + return sk; +} + /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -200,25 +225,42 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2; struct udp_hslot *hslot2; - struct sock *result; + struct sock *result, *sk; hash2 = ipv6_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; + /* Lookup connected or non-wildcard sockets */ result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, hslot2, skb); - if (!result) { - hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); - slot2 = hash2 & udptable->mask; + if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED) + goto done; + + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp6_lookup_run_bpf(net, udptable, skb, + saddr, sport, daddr, hnum); + if (sk) { + result = sk; + goto done; + } + } - hslot2 = &udptable->hash2[slot2]; + /* Got non-wildcard socket or error on first lookup */ + if (result) + goto done; - result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, sdif, - hslot2, skb); - } + /* Lookup wildcard sockets */ + hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = udp6_lib_lookup2(net, saddr, sport, + &in6addr_any, hnum, dif, sdif, + hslot2, skb); +done: if (IS_ERR(result)) return NULL; return result; -- cgit From dfd5ec1ba602b1a9b36f9b6208f09546784932b9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 19 Jul 2020 11:08:01 -0700 Subject: net: atm: lec_arpc.h: delete duplicated word Delete the doubled word "the" in a comment. Signed-off-by: Randy Dunlap Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/atm/lec_arpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h index 1205d8792d28..39115fe074c4 100644 --- a/net/atm/lec_arpc.h +++ b/net/atm/lec_arpc.h @@ -44,7 +44,7 @@ struct lec_arp_table { u8 *tlvs; u32 sizeoftlvs; /* * LANE2: Each MAC address can have TLVs - * associated with it. sizeoftlvs tells the + * associated with it. sizeoftlvs tells * the length of the tlvs array */ struct sk_buff_head tx_wait; /* wait queue for outgoing packets */ -- cgit From 089377b7e8e70f00c2290b00cb0024cc6e165a7d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 19 Jul 2020 11:08:24 -0700 Subject: net: rds: rdma_transport.h: delete duplicated word Delete the doubled word "be" in a comment. Signed-off-by: Randy Dunlap Cc: Santosh Shilimkar Cc: netdev@vger.kernel.org Cc: linux-rdma@vger.kernel.org Signed-off-by: David S. Miller --- net/rds/rdma_transport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index bfafd4a6d827..ca4c3a667091 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -13,7 +13,7 @@ /* Below reject reason is for legacy interoperability issue with non-linux * RDS endpoints where older version incompatibility is conveyed via value 1. - * For future version(s), proper encoded reject reason should be be used. + * For future version(s), proper encoded reject reason should be used. */ #define RDS_RDMA_REJ_INCOMPAT 1 -- cgit From a06d30ae7af492497ffbca6abf1621d508b8fcaa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:10 +0200 Subject: net/atm: remove the atmdev_ops {get, set}sockopt methods All implementations of these two methods are dummies that always return -EINVAL. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/atm/common.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index 8575f5d52087..9b28f1fb3c69 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -782,13 +782,8 @@ int vcc_setsockopt(struct socket *sock, int level, int optname, vcc->atm_options &= ~ATM_ATMOPT_CLP; return 0; default: - if (level == SOL_SOCKET) - return -EINVAL; - break; - } - if (!vcc->dev || !vcc->dev->ops->setsockopt) return -EINVAL; - return vcc->dev->ops->setsockopt(vcc, level, optname, optval, optlen); + } } int vcc_getsockopt(struct socket *sock, int level, int optname, @@ -826,13 +821,8 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, return copy_to_user(optval, &pvc, sizeof(pvc)) ? -EFAULT : 0; } default: - if (level == SOL_SOCKET) - return -EINVAL; - break; - } - if (!vcc->dev || !vcc->dev->ops->getsockopt) return -EINVAL; - return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len); + } } int register_atmdevice_notifier(struct notifier_block *nb) -- cgit From 4a3672993f95c5f2ff0cf0951ba9a712d941a248 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:11 +0200 Subject: net: streamline __sys_setsockopt Return early when sockfd_lookup_light fails to reduce a level of indentation for most of the function body. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/socket.c | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 770503c4ca76..49a6daf0293b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2107,43 +2107,40 @@ static int __sys_setsockopt(int fd, int level, int optname, return -EINVAL; sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_setsockopt(sock, level, optname); - if (err) - goto out_put; + if (!sock) + return err; - err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, - &optname, optval, &optlen, - &kernel_optval); + err = security_socket_setsockopt(sock, level, optname); + if (err) + goto out_put; - if (err < 0) { - goto out_put; - } else if (err > 0) { - err = 0; - goto out_put; - } + err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, + optval, &optlen, &kernel_optval); + if (err < 0) + goto out_put; + if (err > 0) { + err = 0; + goto out_put; + } - if (kernel_optval) { - set_fs(KERNEL_DS); - optval = (char __user __force *)kernel_optval; - } + if (kernel_optval) { + set_fs(KERNEL_DS); + optval = (char __user __force *)kernel_optval; + } - if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) - err = - sock_setsockopt(sock, level, optname, optval, + if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) + err = sock_setsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->setsockopt(sock, level, optname, optval, optlen); - else - err = - sock->ops->setsockopt(sock, level, optname, optval, - optlen); - if (kernel_optval) { - set_fs(oldfs); - kfree(kernel_optval); - } -out_put: - fput_light(sock->file, fput_needed); + if (kernel_optval) { + set_fs(oldfs); + kfree(kernel_optval); } + +out_put: + fput_light(sock->file, fput_needed); return err; } -- cgit From d8a9b38f83ea91341f80beb0a07a5777c6acf7a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:12 +0200 Subject: net: streamline __sys_getsockopt Return early when sockfd_lookup_light fails to reduce a level of indentation for most of the function body. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/socket.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 49a6daf0293b..b79376b17b45 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2163,28 +2163,25 @@ static int __sys_getsockopt(int fd, int level, int optname, int max_optlen; sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_getsockopt(sock, level, optname); - if (err) - goto out_put; + if (!sock) + return err; - max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + err = security_socket_getsockopt(sock, level, optname); + if (err) + goto out_put; - if (level == SOL_SOCKET) - err = - sock_getsockopt(sock, level, optname, optval, + max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + + if (level == SOL_SOCKET) + err = sock_getsockopt(sock, level, optname, optval, optlen); + else + err = sock->ops->getsockopt(sock, level, optname, optval, optlen); - else - err = - sock->ops->getsockopt(sock, level, optname, optval, - optlen); - err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, - optval, optlen, - max_optlen, err); + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, optval, + optlen, max_optlen, err); out_put: - fput_light(sock->file, fput_needed); - } + fput_light(sock->file, fput_needed); return err; } -- cgit From 4d295e54611509854a12c26f95a6f4430731d614 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:13 +0200 Subject: net: simplify cBPF setsockopt compat handling Add a helper that copies either a native or compat bpf_fprog from userspace after verifying the length, and remove the compat setsockopt handlers that now aren't required. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/compat.c | 45 +-------------------------------------------- net/core/filter.c | 23 +++++++++++++++++++++++ net/core/sock.c | 30 ++++++++++-------------------- net/packet/af_packet.c | 33 ++++----------------------------- 4 files changed, 38 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 5e3041a2c37d..3e6c2c5ff260 100644 --- a/net/compat.c +++ b/net/compat.c @@ -335,49 +335,6 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) __scm_destroy(scm); } -/* allocate a 64-bit sock_fprog on the user stack for duration of syscall. */ -struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval) -{ - struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval; - struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); - struct compat_sock_fprog f32; - struct sock_fprog f; - - if (copy_from_user(&f32, fprog32, sizeof(*fprog32))) - return NULL; - memset(&f, 0, sizeof(f)); - f.len = f32.len; - f.filter = compat_ptr(f32.filter); - if (copy_to_user(kfprog, &f, sizeof(struct sock_fprog))) - return NULL; - - return kfprog; -} -EXPORT_SYMBOL_GPL(get_compat_bpf_fprog); - -static int do_set_attach_filter(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct sock_fprog __user *kfprog; - - kfprog = get_compat_bpf_fprog(optval); - if (!kfprog) - return -EFAULT; - - return sock_setsockopt(sock, level, optname, (char __user *)kfprog, - sizeof(struct sock_fprog)); -} - -static int compat_sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (optname == SO_ATTACH_FILTER || - optname == SO_ATTACH_REUSEPORT_CBPF) - return do_set_attach_filter(sock, level, optname, - optval, optlen); - return sock_setsockopt(sock, level, optname, optval, optlen); -} - static int __compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen) { @@ -396,7 +353,7 @@ static int __compat_sys_setsockopt(int fd, int level, int optname, } if (level == SOL_SOCKET) - err = compat_sock_setsockopt(sock, level, + err = sock_setsockopt(sock, level, optname, optval, optlen); else if (sock->ops->compat_setsockopt) err = sock->ops->compat_setsockopt(sock, level, diff --git a/net/core/filter.c b/net/core/filter.c index bdd2382e655d..2bf6624796d8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -77,6 +77,29 @@ #include #include +int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len) +{ + if (in_compat_syscall()) { + struct compat_sock_fprog f32; + + if (len != sizeof(f32)) + return -EINVAL; + if (copy_from_user(&f32, src, sizeof(f32))) + return -EFAULT; + memset(dst, 0, sizeof(*dst)); + dst->len = f32.len; + dst->filter = compat_ptr(f32.filter); + } else { + if (len != sizeof(*dst)) + return -EINVAL; + if (copy_from_user(dst, src, sizeof(*dst))) + return -EFAULT; + } + + return 0; +} +EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); + /** * sk_filter_trim_cap - run a packet through a socket filter * @sk: sock associated with &sk_buff diff --git a/net/core/sock.c b/net/core/sock.c index 11d6f77dd562..e085df794825 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1059,19 +1059,14 @@ set_sndbuf: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD); break; - case SO_ATTACH_FILTER: - ret = -EINVAL; - if (optlen == sizeof(struct sock_fprog)) { - struct sock_fprog fprog; - - ret = -EFAULT; - if (copy_from_user(&fprog, optval, sizeof(fprog))) - break; + case SO_ATTACH_FILTER: { + struct sock_fprog fprog; + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); + if (!ret) ret = sk_attach_filter(&fprog, sk); - } break; - + } case SO_ATTACH_BPF: ret = -EINVAL; if (optlen == sizeof(u32)) { @@ -1085,19 +1080,14 @@ set_sndbuf: } break; - case SO_ATTACH_REUSEPORT_CBPF: - ret = -EINVAL; - if (optlen == sizeof(struct sock_fprog)) { - struct sock_fprog fprog; - - ret = -EFAULT; - if (copy_from_user(&fprog, optval, sizeof(fprog))) - break; + case SO_ATTACH_REUSEPORT_CBPF: { + struct sock_fprog fprog; + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); + if (!ret) ret = sk_reuseport_attach_filter(&fprog, sk); - } break; - + } case SO_ATTACH_REUSEPORT_EBPF: ret = -EINVAL; if (optlen == sizeof(u32)) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 781fee93b7d5..35aee9e98053 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1545,10 +1545,10 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) return -EPERM; - if (len != sizeof(fprog)) - return -EINVAL; - if (copy_from_user(&fprog, data, len)) - return -EFAULT; + + ret = copy_bpf_fprog_from_user(&fprog, data, len); + if (ret) + return ret; ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); if (ret) @@ -4040,28 +4040,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, return 0; } - -#ifdef CONFIG_COMPAT -static int compat_packet_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct packet_sock *po = pkt_sk(sock->sk); - - if (level != SOL_PACKET) - return -ENOPROTOOPT; - - if (optname == PACKET_FANOUT_DATA && - po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { - optval = (char __user *)get_compat_bpf_fprog(optval); - if (!optval) - return -EFAULT; - optlen = sizeof(struct sock_fprog); - } - - return packet_setsockopt(sock, level, optname, optval, optlen); -} -#endif - static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { @@ -4549,9 +4527,6 @@ static const struct proto_ops packet_ops = { .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_packet_setsockopt, -#endif .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, -- cgit From 8c918ffbbad49454ed26c53eb1b90bf98bb5e394 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:14 +0200 Subject: net: remove compat_sock_common_{get,set}sockopt Add the compat handling to sock_common_{get,set}sockopt instead, keyed of in_compat_syscall(). This allow to remove the now unused ->compat_{get,set}sockopt methods from struct proto_ops. Signed-off-by: Christoph Hellwig Acked-by: Matthieu Baerts Acked-by: Stefan Schmidt Signed-off-by: David S. Miller --- net/core/sock.c | 30 ++++++------------------------ net/dccp/ipv4.c | 4 ---- net/dccp/ipv6.c | 2 -- net/ieee802154/socket.c | 8 -------- net/ipv4/af_inet.c | 6 ------ net/ipv6/af_inet6.c | 4 ---- net/ipv6/ipv6_sockglue.c | 12 ++---------- net/ipv6/raw.c | 2 -- net/l2tp/l2tp_ip.c | 4 ---- net/l2tp/l2tp_ip6.c | 2 -- net/mptcp/protocol.c | 6 ------ net/phonet/socket.c | 8 -------- net/sctp/ipv6.c | 2 -- net/sctp/protocol.c | 4 ---- 14 files changed, 8 insertions(+), 86 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index e085df794825..018404d17626 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3199,23 +3199,14 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); -} -EXPORT_SYMBOL(sock_common_getsockopt); - #ifdef CONFIG_COMPAT -int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->compat_getsockopt != NULL) + if (in_compat_syscal() && sk->sk_prot->compat_getsockopt) return sk->sk_prot->compat_getsockopt(sk, level, optname, optval, optlen); +#endif return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); } -EXPORT_SYMBOL(compat_sock_common_getsockopt); -#endif +EXPORT_SYMBOL(sock_common_getsockopt); int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -3240,23 +3231,14 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); -} -EXPORT_SYMBOL(sock_common_setsockopt); - #ifdef CONFIG_COMPAT -int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - - if (sk->sk_prot->compat_setsockopt != NULL) + if (in_compat_syscall() && sk->sk_prot->compat_setsockopt) return sk->sk_prot->compat_setsockopt(sk, level, optname, optval, optlen); +#endif return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); } -EXPORT_SYMBOL(compat_sock_common_setsockopt); -#endif +EXPORT_SYMBOL(sock_common_setsockopt); void sk_common_release(struct sock *sk) { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a7e989919c53..316cc5ac0da7 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -999,10 +999,6 @@ static const struct proto_ops inet_dccp_ops = { .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif }; static struct inet_protosw dccp_v4_protosw = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 650187d68851..b50f85a72cd5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1083,8 +1083,6 @@ static const struct proto_ops inet6_dccp_ops = { .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif }; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index d93d4531aa9b..94ae9662133e 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -423,10 +423,6 @@ static const struct proto_ops ieee802154_raw_ops = { .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif }; /* DGRAM Sockets (802.15.4 dataframes) */ @@ -986,10 +982,6 @@ static const struct proto_ops ieee802154_dgram_ops = { .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif }; /* Create a socket. Initialise the socket, blank the addresses diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ff141d630bdf..4307503a6f0b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1043,8 +1043,6 @@ const struct proto_ops inet_stream_ops = { .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, @@ -1073,8 +1071,6 @@ const struct proto_ops inet_dgram_ops = { .sendpage = inet_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl, #endif }; @@ -1105,8 +1101,6 @@ static const struct proto_ops inet_sockraw_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl, #endif }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b304b882e031..0306509ab063 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -688,8 +688,6 @@ const struct proto_ops inet6_stream_ops = { .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif .set_rcvlowat = tcp_set_rcvlowat, }; @@ -717,8 +715,6 @@ const struct proto_ops inet6_dgram_ops = { .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif }; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 20576e87a5f7..6ab44ec2c369 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -914,12 +914,8 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, { int err; - if (level == SOL_IP && sk->sk_type != SOCK_RAW) { - if (udp_prot.compat_setsockopt != NULL) - return udp_prot.compat_setsockopt(sk, level, optname, - optval, optlen); + if (level == SOL_IP && sk->sk_type != SOCK_RAW) return udp_prot.setsockopt(sk, level, optname, optval, optlen); - } if (level != SOL_IPV6) return -ENOPROTOOPT; @@ -1480,12 +1476,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, { int err; - if (level == SOL_IP && sk->sk_type != SOCK_RAW) { - if (udp_prot.compat_getsockopt != NULL) - return udp_prot.compat_getsockopt(sk, level, optname, - optval, optlen); + if (level == SOL_IP && sk->sk_type != SOCK_RAW) return udp_prot.getsockopt(sk, level, optname, optval, optlen); - } if (level != SOL_IPV6) return -ENOPROTOOPT; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8ef5a7b30524..e23c6b461758 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1378,8 +1378,6 @@ const struct proto_ops inet6_sockraw_ops = { .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 955662a6dee7..f8d7412cfb3d 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -638,10 +638,6 @@ static const struct proto_ops l2tp_ip_ops = { .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif }; static struct inet_protosw l2tp_ip_protosw = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 526ed2c24dd5..2cdc0b7a7a43 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -773,8 +773,6 @@ static const struct proto_ops l2tp_ip6_ops = { .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index dbe43e0cd734..f0b0b503c262 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2166,10 +2166,6 @@ static const struct proto_ops mptcp_stream_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif }; static struct inet_protosw mptcp_protosw = { @@ -2222,8 +2218,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif }; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 76d499f6af9a..87c60f83c180 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -441,10 +441,6 @@ const struct proto_ops phonet_dgram_ops = { .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, -#ifdef CONFIG_COMPAT - .compat_setsockopt = sock_no_setsockopt, - .compat_getsockopt = sock_no_getsockopt, -#endif .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, @@ -466,10 +462,6 @@ const struct proto_ops phonet_stream_ops = { .shutdown = sock_no_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ccfa0ab3e7f4..ebda31b7747d 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1033,8 +1033,6 @@ static const struct proto_ops inet6_seqpacket_ops = { .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, #endif }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index cde29f3c7fb3..8d25cc464efd 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1036,10 +1036,6 @@ static const struct proto_ops inet_seqpacket_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif }; /* Registration with AF_INET family. */ -- cgit From 55db9c0e853421fa71cac5e6855898601f78a1f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:15 +0200 Subject: net: remove compat_sys_{get,set}sockopt Now that the ->compat_{get,set}sockopt proto_ops methods are gone there is no good reason left to keep the compat syscalls separate. This fixes the odd use of unsigned int for the compat_setsockopt optlen and the missing sock_use_custom_sol_socket. It would also easily allow running the eBPF hooks for the compat syscalls, but such a large change in behavior does not belong into a consolidation patch like this one. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/compat.c | 79 +++--------------------------------------------------------- net/socket.c | 25 ++++++++++--------- 2 files changed, 17 insertions(+), 87 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 3e6c2c5ff260..091875bd6210 100644 --- a/net/compat.c +++ b/net/compat.c @@ -335,77 +335,6 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) __scm_destroy(scm); } -static int __compat_sys_setsockopt(int fd, int level, int optname, - char __user *optval, unsigned int optlen) -{ - int err; - struct socket *sock; - - if (optlen > INT_MAX) - return -EINVAL; - - sock = sockfd_lookup(fd, &err); - if (sock) { - err = security_socket_setsockopt(sock, level, optname); - if (err) { - sockfd_put(sock); - return err; - } - - if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, - optname, optval, optlen); - else if (sock->ops->compat_setsockopt) - err = sock->ops->compat_setsockopt(sock, level, - optname, optval, optlen); - else - err = sock->ops->setsockopt(sock, level, - optname, optval, optlen); - sockfd_put(sock); - } - return err; -} - -COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, - char __user *, optval, unsigned int, optlen) -{ - return __compat_sys_setsockopt(fd, level, optname, optval, optlen); -} - -static int __compat_sys_getsockopt(int fd, int level, int optname, - char __user *optval, - int __user *optlen) -{ - int err; - struct socket *sock = sockfd_lookup(fd, &err); - - if (sock) { - err = security_socket_getsockopt(sock, level, optname); - if (err) { - sockfd_put(sock); - return err; - } - - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, - optname, optval, optlen); - else if (sock->ops->compat_getsockopt) - err = sock->ops->compat_getsockopt(sock, level, - optname, optval, optlen); - else - err = sock->ops->getsockopt(sock, level, - optname, optval, optlen); - sockfd_put(sock); - } - return err; -} - -COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, - char __user *, optval, int __user *, optlen) -{ - return __compat_sys_getsockopt(fd, level, optname, optval, optlen); -} - /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { @@ -565,13 +494,11 @@ COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) ret = __sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: - ret = __compat_sys_setsockopt(a0, a1, a[2], - compat_ptr(a[3]), a[4]); + ret = __sys_setsockopt(a0, a1, a[2], compat_ptr(a[3]), a[4]); break; case SYS_GETSOCKOPT: - ret = __compat_sys_getsockopt(a0, a1, a[2], - compat_ptr(a[3]), - compat_ptr(a[4])); + ret = __sys_getsockopt(a0, a1, a[2], compat_ptr(a[3]), + compat_ptr(a[4])); break; case SYS_SENDMSG: ret = __compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); diff --git a/net/socket.c b/net/socket.c index b79376b17b45..dec345982abb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2094,9 +2094,8 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) * Set a socket option. Because we don't know the option lengths we have * to pass the user mode parameter for the protocols to sort out. */ - -static int __sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) +int __sys_setsockopt(int fd, int level, int optname, char __user *optval, + int optlen) { mm_segment_t oldfs = get_fs(); char *kernel_optval = NULL; @@ -2114,8 +2113,10 @@ static int __sys_setsockopt(int fd, int level, int optname, if (err) goto out_put; - err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, - optval, &optlen, &kernel_optval); + if (!in_compat_syscall()) + err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, + optval, &optlen, + &kernel_optval); if (err < 0) goto out_put; if (err > 0) { @@ -2154,9 +2155,8 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, * Get a socket option. Because we don't know the option lengths we have * to pass a user mode parameter for the protocols to sort out. */ - -static int __sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +int __sys_getsockopt(int fd, int level, int optname, char __user *optval, + int __user *optlen) { int err, fput_needed; struct socket *sock; @@ -2170,7 +2170,8 @@ static int __sys_getsockopt(int fd, int level, int optname, if (err) goto out_put; - max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + if (!in_compat_syscall()) + max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); if (level == SOL_SOCKET) err = sock_getsockopt(sock, level, optname, optval, optlen); @@ -2178,8 +2179,10 @@ static int __sys_getsockopt(int fd, int level, int optname, err = sock->ops->getsockopt(sock, level, optname, optval, optlen); - err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, optval, - optlen, max_optlen, err); + if (!in_compat_syscall()) + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, + optval, optlen, max_optlen, + err); out_put: fput_light(sock->file, fput_needed); return err; -- cgit From 983094b4fc2d6a0b356f189593970cea2682a20c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:16 +0200 Subject: netfilter/arp_tables: clean up compat {get, set}sockopt handling Merge the native and compat {get,set}sockopt handlers using in_compat_syscall(). Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 85 ++++++++++------------------------------- 1 file changed, 21 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b167f4a5b684..15807fb4a65f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -787,8 +787,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(struct net *net, void __user *user, - const int *len, int compat) +static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -802,7 +801,7 @@ static int get_info(struct net *net, void __user *user, name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT - if (compat) + if (in_compat_syscall()) xt_compat_lock(NFPROTO_ARP); #endif t = xt_request_find_table_lock(net, NFPROTO_ARP, name); @@ -812,7 +811,7 @@ static int get_info(struct net *net, void __user *user, #ifdef CONFIG_COMPAT struct xt_table_info tmp; - if (compat) { + if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; @@ -837,7 +836,7 @@ static int get_info(struct net *net, void __user *user, } else ret = PTR_ERR(t); #ifdef CONFIG_COMPAT - if (compat) + if (in_compat_syscall()) xt_compat_unlock(NFPROTO_ARP); #endif return ret; @@ -998,7 +997,7 @@ static int do_replace(struct net *net, const void __user *user, } static int do_add_counters(struct net *net, const void __user *user, - unsigned int len, int compat) + unsigned int len) { unsigned int i; struct xt_counters_info tmp; @@ -1009,7 +1008,8 @@ static int do_add_counters(struct net *net, const void __user *user, struct arpt_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + paddc = xt_copy_counters_from_user(user, len, &tmp, + in_compat_syscall()); if (IS_ERR(paddc)) return PTR_ERR(paddc); @@ -1294,30 +1294,6 @@ static int compat_do_replace(struct net *net, void __user *user, return ret; } -static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, - unsigned int len) -{ - int ret; - - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case ARPT_SO_SET_REPLACE: - ret = compat_do_replace(sock_net(sk), user, len); - break; - - case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len, 1); - break; - - default: - ret = -EINVAL; - } - - return ret; -} - static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, compat_uint_t *size, struct xt_counters *counters, @@ -1425,29 +1401,6 @@ static int compat_get_entries(struct net *net, xt_compat_unlock(NFPROTO_ARP); return ret; } - -static int do_arpt_get_ctl(struct sock *, int, void __user *, int *); - -static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, - int *len) -{ - int ret; - - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case ARPT_SO_GET_INFO: - ret = get_info(sock_net(sk), user, len, 1); - break; - case ARPT_SO_GET_ENTRIES: - ret = compat_get_entries(sock_net(sk), user, len); - break; - default: - ret = do_arpt_get_ctl(sk, cmd, user, len); - } - return ret; -} #endif static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) @@ -1459,11 +1412,16 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = do_replace(sock_net(sk), user, len); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(sock_net(sk), user, len); + else +#endif + ret = do_replace(sock_net(sk), user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len, 0); + ret = do_add_counters(sock_net(sk), user, len); break; default: @@ -1482,11 +1440,16 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(sock_net(sk), user, len, 0); + ret = get_info(sock_net(sk), user, len); break; case ARPT_SO_GET_ENTRIES: - ret = get_entries(sock_net(sk), user, len); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_get_entries(sock_net(sk), user, len); + else +#endif + ret = get_entries(sock_net(sk), user, len); break; case ARPT_SO_GET_REVISION_TARGET: { @@ -1610,15 +1573,9 @@ static struct nf_sockopt_ops arpt_sockopts = { .set_optmin = ARPT_BASE_CTL, .set_optmax = ARPT_SO_SET_MAX+1, .set = do_arpt_set_ctl, -#ifdef CONFIG_COMPAT - .compat_set = compat_do_arpt_set_ctl, -#endif .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, -#ifdef CONFIG_COMPAT - .compat_get = compat_do_arpt_get_ctl, -#endif .owner = THIS_MODULE, }; -- cgit From 89c53c14e4d218ca45b47402d991e82b77875888 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:17 +0200 Subject: netfilter/ip_tables: clean up compat {get,set}sockopt handling Merge the native and compat {get,set}sockopt handlers using in_compat_syscall(). Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_tables.c | 86 +++++++++++------------------------------- 1 file changed, 21 insertions(+), 65 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5bf9fa06aee0..fbfad38f3979 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -944,8 +944,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(struct net *net, void __user *user, - const int *len, int compat) +static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -959,7 +958,7 @@ static int get_info(struct net *net, void __user *user, name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT - if (compat) + if (in_compat_syscall()) xt_compat_lock(AF_INET); #endif t = xt_request_find_table_lock(net, AF_INET, name); @@ -969,7 +968,7 @@ static int get_info(struct net *net, void __user *user, #ifdef CONFIG_COMPAT struct xt_table_info tmp; - if (compat) { + if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; @@ -995,7 +994,7 @@ static int get_info(struct net *net, void __user *user, } else ret = PTR_ERR(t); #ifdef CONFIG_COMPAT - if (compat) + if (in_compat_syscall()) xt_compat_unlock(AF_INET); #endif return ret; @@ -1153,7 +1152,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) static int do_add_counters(struct net *net, const void __user *user, - unsigned int len, int compat) + unsigned int len) { unsigned int i; struct xt_counters_info tmp; @@ -1164,7 +1163,8 @@ do_add_counters(struct net *net, const void __user *user, struct ipt_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + paddc = xt_copy_counters_from_user(user, len, &tmp, + in_compat_syscall()); if (IS_ERR(paddc)) return PTR_ERR(paddc); @@ -1534,31 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -static int -compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, - unsigned int len) -{ - int ret; - - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case IPT_SO_SET_REPLACE: - ret = compat_do_replace(sock_net(sk), user, len); - break; - - case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len, 1); - break; - - default: - ret = -EINVAL; - } - - return ret; -} - struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; @@ -1634,29 +1609,6 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_unlock(AF_INET); return ret; } - -static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); - -static int -compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) -{ - int ret; - - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case IPT_SO_GET_INFO: - ret = get_info(sock_net(sk), user, len, 1); - break; - case IPT_SO_GET_ENTRIES: - ret = compat_get_entries(sock_net(sk), user, len); - break; - default: - ret = do_ipt_get_ctl(sk, cmd, user, len); - } - return ret; -} #endif static int @@ -1669,11 +1621,16 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(sock_net(sk), user, len); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(sock_net(sk), user, len); + else +#endif + ret = do_replace(sock_net(sk), user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len, 0); + ret = do_add_counters(sock_net(sk), user, len); break; default: @@ -1693,11 +1650,16 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(sock_net(sk), user, len, 0); + ret = get_info(sock_net(sk), user, len); break; case IPT_SO_GET_ENTRIES: - ret = get_entries(sock_net(sk), user, len); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_get_entries(sock_net(sk), user, len); + else +#endif + ret = get_entries(sock_net(sk), user, len); break; case IPT_SO_GET_REVISION_MATCH: @@ -1886,15 +1848,9 @@ static struct nf_sockopt_ops ipt_sockopts = { .set_optmin = IPT_BASE_CTL, .set_optmax = IPT_SO_SET_MAX+1, .set = do_ipt_set_ctl, -#ifdef CONFIG_COMPAT - .compat_set = compat_do_ipt_set_ctl, -#endif .get_optmin = IPT_BASE_CTL, .get_optmax = IPT_SO_GET_MAX+1, .get = do_ipt_get_ctl, -#ifdef CONFIG_COMPAT - .compat_get = compat_do_ipt_get_ctl, -#endif .owner = THIS_MODULE, }; -- cgit From f415e76fd723594ed878a5a79a0ec36eca81c312 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:18 +0200 Subject: netfilter/ip6_tables: clean up compat {get, set}sockopt handling Merge the native and compat {get,set}sockopt handlers using in_compat_syscall(). Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 87 ++++++++++------------------------------- 1 file changed, 21 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e96a431549bc..96c48e91e6c7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -960,8 +960,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(struct net *net, void __user *user, - const int *len, int compat) +static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -975,7 +974,7 @@ static int get_info(struct net *net, void __user *user, name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT - if (compat) + if (in_compat_syscall()) xt_compat_lock(AF_INET6); #endif t = xt_request_find_table_lock(net, AF_INET6, name); @@ -985,7 +984,7 @@ static int get_info(struct net *net, void __user *user, #ifdef CONFIG_COMPAT struct xt_table_info tmp; - if (compat) { + if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET6); private = &tmp; @@ -1011,7 +1010,7 @@ static int get_info(struct net *net, void __user *user, } else ret = PTR_ERR(t); #ifdef CONFIG_COMPAT - if (compat) + if (in_compat_syscall()) xt_compat_unlock(AF_INET6); #endif return ret; @@ -1169,8 +1168,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) } static int -do_add_counters(struct net *net, const void __user *user, unsigned int len, - int compat) +do_add_counters(struct net *net, const void __user *user, unsigned int len) { unsigned int i; struct xt_counters_info tmp; @@ -1181,7 +1179,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, struct ip6t_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp, compat); + paddc = xt_copy_counters_from_user(user, len, &tmp, + in_compat_syscall()); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); @@ -1543,31 +1542,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -static int -compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, - unsigned int len) -{ - int ret; - - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case IP6T_SO_SET_REPLACE: - ret = compat_do_replace(sock_net(sk), user, len); - break; - - case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len, 1); - break; - - default: - ret = -EINVAL; - } - - return ret; -} - struct compat_ip6t_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; @@ -1643,29 +1617,6 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_unlock(AF_INET6); return ret; } - -static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *); - -static int -compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) -{ - int ret; - - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case IP6T_SO_GET_INFO: - ret = get_info(sock_net(sk), user, len, 1); - break; - case IP6T_SO_GET_ENTRIES: - ret = compat_get_entries(sock_net(sk), user, len); - break; - default: - ret = do_ip6t_get_ctl(sk, cmd, user, len); - } - return ret; -} #endif static int @@ -1678,11 +1629,16 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = do_replace(sock_net(sk), user, len); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(sock_net(sk), user, len); + else +#endif + ret = do_replace(sock_net(sk), user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len, 0); + ret = do_add_counters(sock_net(sk), user, len); break; default: @@ -1702,11 +1658,16 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(sock_net(sk), user, len, 0); + ret = get_info(sock_net(sk), user, len); break; case IP6T_SO_GET_ENTRIES: - ret = get_entries(sock_net(sk), user, len); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_get_entries(sock_net(sk), user, len); + else +#endif + ret = get_entries(sock_net(sk), user, len); break; case IP6T_SO_GET_REVISION_MATCH: @@ -1897,15 +1858,9 @@ static struct nf_sockopt_ops ip6t_sockopts = { .set_optmin = IP6T_BASE_CTL, .set_optmax = IP6T_SO_SET_MAX+1, .set = do_ip6t_set_ctl, -#ifdef CONFIG_COMPAT - .compat_set = compat_do_ip6t_set_ctl, -#endif .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, -#ifdef CONFIG_COMPAT - .compat_get = compat_do_ip6t_get_ctl, -#endif .owner = THIS_MODULE, }; -- cgit From fc66de8e16ec4d9b4d2696d961de4f81db78a1b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:19 +0200 Subject: netfilter/ebtables: clean up compat {get, set}sockopt handling Merge the native and compat {get,set}sockopt handlers using in_compat_syscall(). Note that this required moving a fair amout of code around to be done sanely. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 214 ++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 116 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c83ffe912163..fe13108af1f5 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1451,86 +1451,6 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, ebt_entry_to_user, entries, tmp.entries); } -static int do_ebt_set_ctl(struct sock *sk, - int cmd, void __user *user, unsigned int len) -{ - int ret; - struct net *net = sock_net(sk); - - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case EBT_SO_SET_ENTRIES: - ret = do_replace(net, user, len); - break; - case EBT_SO_SET_COUNTERS: - ret = update_counters(net, user, len); - break; - default: - ret = -EINVAL; - } - return ret; -} - -static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) -{ - int ret; - struct ebt_replace tmp; - struct ebt_table *t; - struct net *net = sock_net(sk); - - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(&tmp, user, sizeof(tmp))) - return -EFAULT; - - tmp.name[sizeof(tmp.name) - 1] = '\0'; - - t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); - if (!t) - return ret; - - switch (cmd) { - case EBT_SO_GET_INFO: - case EBT_SO_GET_INIT_INFO: - if (*len != sizeof(struct ebt_replace)) { - ret = -EINVAL; - mutex_unlock(&ebt_mutex); - break; - } - if (cmd == EBT_SO_GET_INFO) { - tmp.nentries = t->private->nentries; - tmp.entries_size = t->private->entries_size; - tmp.valid_hooks = t->valid_hooks; - } else { - tmp.nentries = t->table->nentries; - tmp.entries_size = t->table->entries_size; - tmp.valid_hooks = t->table->valid_hooks; - } - mutex_unlock(&ebt_mutex); - if (copy_to_user(user, &tmp, *len) != 0) { - ret = -EFAULT; - break; - } - ret = 0; - break; - - case EBT_SO_GET_ENTRIES: - case EBT_SO_GET_INIT_ENTRIES: - ret = copy_everything_to_user(t, user, len, cmd); - mutex_unlock(&ebt_mutex); - break; - - default: - mutex_unlock(&ebt_mutex); - ret = -EINVAL; - } - - return ret; -} - #ifdef CONFIG_COMPAT /* 32 bit-userspace compatibility definitions. */ struct compat_ebt_replace { @@ -2314,28 +2234,6 @@ static int compat_update_counters(struct net *net, void __user *user, hlp.num_counters, user, len); } -static int compat_do_ebt_set_ctl(struct sock *sk, - int cmd, void __user *user, unsigned int len) -{ - int ret; - struct net *net = sock_net(sk); - - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - switch (cmd) { - case EBT_SO_SET_ENTRIES: - ret = compat_do_replace(net, user, len); - break; - case EBT_SO_SET_COUNTERS: - ret = compat_update_counters(net, user, len); - break; - default: - ret = -EINVAL; - } - return ret; -} - static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { @@ -2344,14 +2242,6 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, struct ebt_table *t; struct net *net = sock_net(sk); - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - - /* try real handler in case userland supplied needed padding */ - if ((cmd == EBT_SO_GET_INFO || - cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(tmp)) - return do_ebt_get_ctl(sk, cmd, user, len); - if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; @@ -2413,20 +2303,112 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, } #endif +static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + struct net *net = sock_net(sk); + struct ebt_replace tmp; + struct ebt_table *t; + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +#ifdef CONFIG_COMPAT + /* try real handler in case userland supplied needed padding */ + if (in_compat_syscall() && + ((cmd != EBT_SO_GET_INFO && cmd != EBT_SO_GET_INIT_INFO) || + *len != sizeof(tmp))) + return compat_do_ebt_get_ctl(sk, cmd, user, len); +#endif + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + tmp.name[sizeof(tmp.name) - 1] = '\0'; + + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); + if (!t) + return ret; + + switch (cmd) { + case EBT_SO_GET_INFO: + case EBT_SO_GET_INIT_INFO: + if (*len != sizeof(struct ebt_replace)) { + ret = -EINVAL; + mutex_unlock(&ebt_mutex); + break; + } + if (cmd == EBT_SO_GET_INFO) { + tmp.nentries = t->private->nentries; + tmp.entries_size = t->private->entries_size; + tmp.valid_hooks = t->valid_hooks; + } else { + tmp.nentries = t->table->nentries; + tmp.entries_size = t->table->entries_size; + tmp.valid_hooks = t->table->valid_hooks; + } + mutex_unlock(&ebt_mutex); + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + + case EBT_SO_GET_ENTRIES: + case EBT_SO_GET_INIT_ENTRIES: + ret = copy_everything_to_user(t, user, len, cmd); + mutex_unlock(&ebt_mutex); + break; + + default: + mutex_unlock(&ebt_mutex); + ret = -EINVAL; + } + + return ret; +} + +static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) +{ + struct net *net = sock_net(sk); + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case EBT_SO_SET_ENTRIES: +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_do_replace(net, user, len); + else +#endif + ret = do_replace(net, user, len); + break; + case EBT_SO_SET_COUNTERS: +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = compat_update_counters(net, user, len); + else +#endif + ret = update_counters(net, user, len); + break; + default: + ret = -EINVAL; + } + return ret; +} + static struct nf_sockopt_ops ebt_sockopts = { .pf = PF_INET, .set_optmin = EBT_BASE_CTL, .set_optmax = EBT_SO_SET_MAX + 1, .set = do_ebt_set_ctl, -#ifdef CONFIG_COMPAT - .compat_set = compat_do_ebt_set_ctl, -#endif .get_optmin = EBT_BASE_CTL, .get_optmax = EBT_SO_GET_MAX + 1, .get = do_ebt_get_ctl, -#ifdef CONFIG_COMPAT - .compat_get = compat_do_ebt_get_ctl, -#endif .owner = THIS_MODULE, }; -- cgit From 77d4df41d53e5c2af14db26f20fe50da52e382ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:20 +0200 Subject: netfilter: remove the compat_{get,set} methods All instances handle compat sockopts via in_compat_syscall() now, so remove the compat_{get,set} methods as well as the compat_nf_{get,set}sockopt wrappers. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 5 ++--- net/ipv6/ipv6_sockglue.c | 5 ++--- net/netfilter/nf_sockopt.c | 42 ------------------------------------------ 3 files changed, 4 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 84ec3703c909..95f4248c6fc5 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1446,8 +1446,7 @@ mc_msf_out: optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && !ip_mroute_opt(optname)) - err = compat_nf_setsockopt(sk, PF_INET, optname, optval, - optlen); + err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); #endif return err; } @@ -1821,7 +1820,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); + err = nf_getsockopt(sk, PF_INET, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6ab44ec2c369..6adfbdcb7979 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1030,8 +1030,7 @@ mc_msf_out: /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && optname != IPV6_XFRM_POLICY) - err = compat_nf_setsockopt(sk, PF_INET6, optname, optval, - optlen); + err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); #endif return err; } @@ -1531,7 +1530,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len); + err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 46cb3786e0ec..02870993d335 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -122,45 +122,3 @@ int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, return nf_sockopt(sk, pf, val, opt, len, 1); } EXPORT_SYMBOL(nf_getsockopt); - -#ifdef CONFIG_COMPAT -static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val, - char __user *opt, int *len, int get) -{ - struct nf_sockopt_ops *ops; - int ret; - - ops = nf_sockopt_find(sk, pf, val, get); - if (IS_ERR(ops)) - return PTR_ERR(ops); - - if (get) { - if (ops->compat_get) - ret = ops->compat_get(sk, val, opt, len); - else - ret = ops->get(sk, val, opt, len); - } else { - if (ops->compat_set) - ret = ops->compat_set(sk, val, opt, *len); - else - ret = ops->set(sk, val, opt, *len); - } - - module_put(ops->owner); - return ret; -} - -int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, - int val, char __user *opt, unsigned int len) -{ - return compat_nf_sockopt(sk, pf, val, opt, &len, 0); -} -EXPORT_SYMBOL(compat_nf_setsockopt); - -int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, - int val, char __user *opt, int *len) -{ - return compat_nf_sockopt(sk, pf, val, opt, len, 1); -} -EXPORT_SYMBOL(compat_nf_getsockopt); -#endif -- cgit From c34bc10d2535719ddf77d44ee849f6c7589583ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:21 +0200 Subject: netfilter: remove the compat argument to xt_copy_counters_from_user Lift the in_compat_syscall() from the callers instead. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 3 +-- net/ipv4/netfilter/ip_tables.c | 3 +-- net/ipv6/netfilter/ip6_tables.c | 3 +-- net/netfilter/x_tables.c | 9 ++++----- 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 15807fb4a65f..2c8a4dad39d7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1008,8 +1008,7 @@ static int do_add_counters(struct net *net, const void __user *user, struct arpt_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp, - in_compat_syscall()); + paddc = xt_copy_counters_from_user(user, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fbfad38f3979..161901dd1cae 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1163,8 +1163,7 @@ do_add_counters(struct net *net, const void __user *user, struct ipt_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp, - in_compat_syscall()); + paddc = xt_copy_counters_from_user(user, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 96c48e91e6c7..fd1f8f931231 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1179,8 +1179,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len) struct ip6t_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp, - in_compat_syscall()); + paddc = xt_copy_counters_from_user(user, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 99a468be4a59..32bab45af7e4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1033,15 +1033,14 @@ EXPORT_SYMBOL_GPL(xt_check_target); * @user: src pointer to userspace memory * @len: alleged size of userspace memory * @info: where to store the xt_counters_info metadata - * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel * * Copies counter meta data from @user and stores it in @info. * * vmallocs memory to hold the counters, then copies the counter data * from @user to the new memory and returns a pointer to it. * - * If @compat is true, @info gets converted automatically to the 64bit - * representation. + * If called from a compat syscall, @info gets converted automatically to the + * 64bit representation. * * The metadata associated with the counters is stored in @info. * @@ -1049,13 +1048,13 @@ EXPORT_SYMBOL_GPL(xt_check_target); * If IS_ERR is false, caller has to vfree the pointer. */ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, - struct xt_counters_info *info, bool compat) + struct xt_counters_info *info) { void *mem; u64 size; #ifdef CONFIG_COMPAT - if (compat) { + if (in_compat_syscall()) { /* structures only differ in size due to alignment */ struct compat_xt_counters_info compat_tmp; -- cgit From 657e4c34a2370da1e82bbfc0d253ad394056b19e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:22 +0200 Subject: netfilter: split nf_sockopt Split nf_sockopt into a getsockopt and setsockopt side as they share very little code. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/netfilter/nf_sockopt.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 02870993d335..90469b1f628a 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -89,36 +89,32 @@ out: return ops; } -/* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, u_int8_t pf, int val, - char __user *opt, int *len, int get) +int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, + unsigned int len) { struct nf_sockopt_ops *ops; int ret; - ops = nf_sockopt_find(sk, pf, val, get); + ops = nf_sockopt_find(sk, pf, val, 0); if (IS_ERR(ops)) return PTR_ERR(ops); - - if (get) - ret = ops->get(sk, val, opt, len); - else - ret = ops->set(sk, val, opt, *len); - + ret = ops->set(sk, val, opt, len); module_put(ops->owner); return ret; } - -int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, - unsigned int len) -{ - return nf_sockopt(sk, pf, val, opt, &len, 0); -} EXPORT_SYMBOL(nf_setsockopt); int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len) { - return nf_sockopt(sk, pf, val, opt, len, 1); + struct nf_sockopt_ops *ops; + int ret; + + ops = nf_sockopt_find(sk, pf, val, 1); + if (IS_ERR(ops)) + return PTR_ERR(ops); + ret = ops->get(sk, val, opt, len); + module_put(ops->owner); + return ret; } EXPORT_SYMBOL(nf_getsockopt); -- cgit From 49e74c24f31048b4faa3cfcccb93f444abb0b910 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:23 +0200 Subject: net/ipv4: factor out MCAST_MSFILTER getsockopt helpers Factor out one helper each for getting the native and compat version of the MCAST_MSFILTER option. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 131 ++++++++++++++++++++++++++----------------------- 1 file changed, 70 insertions(+), 61 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 95f4248c6fc5..70d32c9476a2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1468,6 +1468,74 @@ static bool getsockopt_needs_rtnl(int optname) return false; } +static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, + int __user *optlen, int len) +{ + const int size0 = offsetof(struct group_filter, gf_slist); + struct group_filter __user *p = optval; + struct group_filter gsf; + int num; + int err; + + if (len < size0) + return -EINVAL; + if (copy_from_user(&gsf, p, size0)) + return -EFAULT; + + num = gsf.gf_numsrc; + err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + if (err) + return err; + if (gsf.gf_numsrc < num) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_COMPAT +static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, + int __user *optlen) +{ + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = optval; + struct compat_group_filter gf32; + struct group_filter gf; + int len, err; + int num; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + rtnl_lock(); + lock_sock(sk); + err = ip_mc_gsfget(sk, &gf, p->gf_slist); + release_sock(sk); + rtnl_unlock(); + if (err) + return err; + if (gf.gf_numsrc < num) + num = gf.gf_numsrc; + len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); + if (put_user(len, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; +} +#endif + static int do_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen, unsigned int flags) { @@ -1626,31 +1694,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, goto out; } case MCAST_MSFILTER: - { - struct group_filter __user *p = (void __user *)optval; - struct group_filter gsf; - const int size0 = offsetof(struct group_filter, gf_slist); - int num; - - if (len < size0) { - err = -EINVAL; - goto out; - } - if (copy_from_user(&gsf, p, size0)) { - err = -EFAULT; - goto out; - } - num = gsf.gf_numsrc; - err = ip_mc_gsfget(sk, &gsf, p->gf_slist); - if (err) - goto out; - if (gsf.gf_numsrc < num) - num = gsf.gf_numsrc; - if (put_user(GROUP_FILTER_SIZE(num), optlen) || - copy_to_user(p, &gsf, size0)) - err = -EFAULT; + err = ip_get_mcast_msfilter(sk, optval, optlen, len); goto out; - } case IP_MULTICAST_ALL: val = inet->mc_all; break; @@ -1762,45 +1807,9 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int err; if (optname == MCAST_MSFILTER) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); - struct compat_group_filter __user *p = (void __user *)optval; - struct compat_group_filter gf32; - struct group_filter gf; - int ulen, err; - int num; - if (level != SOL_IP) return -EOPNOTSUPP; - - if (get_user(ulen, optlen)) - return -EFAULT; - - if (ulen < size0) - return -EINVAL; - - if (copy_from_user(&gf32, p, size0)) - return -EFAULT; - - gf.gf_interface = gf32.gf_interface; - gf.gf_fmode = gf32.gf_fmode; - num = gf.gf_numsrc = gf32.gf_numsrc; - gf.gf_group = gf32.gf_group; - - rtnl_lock(); - lock_sock(sk); - err = ip_mc_gsfget(sk, &gf, p->gf_slist); - release_sock(sk); - rtnl_unlock(); - if (err) - return err; - if (gf.gf_numsrc < num) - num = gf.gf_numsrc; - ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); - if (put_user(ulen, optlen) || - put_user(gf.gf_fmode, &p->gf_fmode) || - put_user(gf.gf_numsrc, &p->gf_numsrc)) - return -EFAULT; - return 0; + return compat_ip_get_mcast_msfilter(sk, optval, optlen); } err = do_ip_getsockopt(sk, level, optname, optval, optlen, -- cgit From d62c38f6a1a8c833df8dd35dacde23b0b6c931f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:24 +0200 Subject: net/ipv4: factor out MCAST_MSFILTER setsockopt helpers Factor out one helper each for setting the native and compat version of the MCAST_MSFILTER option. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 162 ++++++++++++++++++++++++++----------------------- 1 file changed, 86 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 70d32c9476a2..b587dee006f8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -722,6 +722,90 @@ static int do_mcast_group_source(struct sock *sk, int optname, return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); } +static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval, + int optlen) +{ + struct group_filter *gsf = NULL; + int err; + + if (optlen < GROUP_FILTER_SIZE(0)) + return -EINVAL; + if (optlen > sysctl_optmem_max) + return -ENOBUFS; + + gsf = memdup_user(optval, optlen); + if (IS_ERR(gsf)) + return PTR_ERR(gsf); + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + err = -ENOBUFS; + if (gsf->gf_numsrc >= 0x1ffffff || + gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + goto out_free_gsf; + + err = -EINVAL; + if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) + goto out_free_gsf; + + err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, + gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); +out_free_gsf: + kfree(gsf); + return err; +} + +#ifdef CONFIG_COMPAT +static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval, + int optlen) +{ + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + unsigned int n; + void *p; + int err; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + + err = -EFAULT; + if (copy_from_user(gf32, optval, optlen)) + goto out_free_gsf; + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + n = gf32->gf_numsrc; + err = -ENOBUFS; + if (n >= 0x1ffffff) + goto out_free_gsf; + + err = -EINVAL; + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + goto out_free_gsf; + + rtnl_lock(); + lock_sock(sk); + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + err = -ENOBUFS; + if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + goto out_unlock; + err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, + &gf32->gf_group, gf32->gf_slist); +out_unlock: + release_sock(sk); + rtnl_unlock(); +out_free_gsf: + kfree(p); + return err; +} +#endif + static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -1167,37 +1251,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } case MCAST_MSFILTER: - { - struct group_filter *gsf = NULL; - - if (optlen < GROUP_FILTER_SIZE(0)) - goto e_inval; - if (optlen > sysctl_optmem_max) { - err = -ENOBUFS; - break; - } - gsf = memdup_user(optval, optlen); - if (IS_ERR(gsf)) { - err = PTR_ERR(gsf); - break; - } - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (gsf->gf_numsrc >= 0x1ffffff || - gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { - err = -ENOBUFS; - goto mc_msf_out; - } - if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { - err = -EINVAL; - goto mc_msf_out; - } - err = set_mcast_msfilter(sk, gsf->gf_interface, - gsf->gf_numsrc, gsf->gf_fmode, - &gsf->gf_group, gsf->gf_slist); -mc_msf_out: - kfree(gsf); + err = ip_set_mcast_msfilter(sk, optval, optlen); break; - } case IP_MULTICAST_ALL: if (optlen < 1) goto e_inval; @@ -1391,52 +1446,7 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, return err; } case MCAST_MSFILTER: - { - const int size0 = offsetof(struct compat_group_filter, gf_slist); - struct compat_group_filter *gf32; - unsigned int n; - void *p; - - if (optlen < size0) - return -EINVAL; - if (optlen > sysctl_optmem_max - 4) - return -ENOBUFS; - - p = kmalloc(optlen + 4, GFP_KERNEL); - if (!p) - return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ - if (copy_from_user(gf32, optval, optlen)) { - err = -EFAULT; - goto mc_msf_out; - } - - n = gf32->gf_numsrc; - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (n >= 0x1ffffff) { - err = -ENOBUFS; - goto mc_msf_out; - } - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { - err = -EINVAL; - goto mc_msf_out; - } - - rtnl_lock(); - lock_sock(sk); - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) - err = -ENOBUFS; - else - err = set_mcast_msfilter(sk, gf32->gf_interface, - n, gf32->gf_fmode, - &gf32->gf_group, gf32->gf_slist); - release_sock(sk); - rtnl_unlock(); -mc_msf_out: - kfree(p); - return err; - } + return compat_ip_set_mcast_msfilter(sk, optval, optlen); } err = do_ip_setsockopt(sk, level, optname, optval, optlen); -- cgit From 02caad7cc0841f94bc105b93fbe468d3cbf6f378 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:25 +0200 Subject: net/ipv4: factor out mcast join/leave setsockopt helpers Factor out one helper each for setting the native and compat version of the MCAST_MSFILTER option. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 109 +++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b587dee006f8..73bb88fbe546 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -806,6 +806,60 @@ out_free_gsf: } #endif +static int ip_mcast_join_leave(struct sock *sk, int optname, + void __user *optval, int optlen) +{ + struct ip_mreqn mreq = { }; + struct sockaddr_in *psin; + struct group_req greq; + + if (optlen < sizeof(struct group_req)) + return -EINVAL; + if (copy_from_user(&greq, optval, sizeof(greq))) + return -EFAULT; + + psin = (struct sockaddr_in *)&greq.gr_group; + if (psin->sin_family != AF_INET) + return -EINVAL; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + if (optname == MCAST_JOIN_GROUP) + return ip_mc_join_group(sk, &mreq); + return ip_mc_leave_group(sk, &mreq); +} + +#ifdef CONFIG_COMPAT +static int compat_ip_mcast_join_leave(struct sock *sk, int optname, + void __user *optval, int optlen) +{ + struct compat_group_req greq; + struct ip_mreqn mreq = { }; + struct sockaddr_in *psin; + int err; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + if (copy_from_user(&greq, optval, sizeof(greq))) + return -EFAULT; + + psin = (struct sockaddr_in *)&greq.gr_group; + if (psin->sin_family != AF_INET) + return -EINVAL; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ip_mc_join_group(sk, &mreq); + else + err = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + rtnl_unlock(); + return err; +} +#endif + static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -1211,29 +1265,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - { - struct group_req greq; - struct sockaddr_in *psin; - struct ip_mreqn mreq; - - if (optlen < sizeof(struct group_req)) - goto e_inval; - err = -EFAULT; - if (copy_from_user(&greq, optval, sizeof(greq))) - break; - psin = (struct sockaddr_in *)&greq.gr_group; - if (psin->sin_family != AF_INET) - goto e_inval; - memset(&mreq, 0, sizeof(mreq)); - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_ifindex = greq.gr_interface; - - if (optname == MCAST_JOIN_GROUP) - err = ip_mc_join_group(sk, &mreq); - else - err = ip_mc_leave_group(sk, &mreq); + err = ip_mcast_join_leave(sk, optname, optval, optlen); break; - } case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: @@ -1389,37 +1422,7 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - { - struct compat_group_req __user *gr32 = (void __user *)optval; - struct group_req greq; - struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; - struct ip_mreqn mreq; - - if (optlen < sizeof(struct compat_group_req)) - return -EINVAL; - - if (get_user(greq.gr_interface, &gr32->gr_interface) || - copy_from_user(&greq.gr_group, &gr32->gr_group, - sizeof(greq.gr_group))) - return -EFAULT; - - if (psin->sin_family != AF_INET) - return -EINVAL; - - memset(&mreq, 0, sizeof(mreq)); - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_ifindex = greq.gr_interface; - - rtnl_lock(); - lock_sock(sk); - if (optname == MCAST_JOIN_GROUP) - err = ip_mc_join_group(sk, &mreq); - else - err = ip_mc_leave_group(sk, &mreq); - release_sock(sk); - rtnl_unlock(); - return err; - } + return compat_ip_mcast_join_leave(sk, optname, optval, optlen); case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: -- cgit From b6238c04c0e5dbe7ae4ea48e96e004905b120a04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:26 +0200 Subject: net/ipv4: remove compat_ip_{get,set}sockopt Handle the few cases that need special treatment in-line using in_compat_syscall(). Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 4 - net/ipv4/ip_sockglue.c | 214 ++++++++++++++----------------------------------- net/ipv4/raw.c | 22 ----- net/ipv4/tcp_ipv4.c | 4 - net/ipv4/udp.c | 24 ------ net/ipv4/udp_impl.h | 6 -- net/ipv4/udplite.c | 4 - net/l2tp/l2tp_ip.c | 4 - net/sctp/protocol.c | 4 - 9 files changed, 61 insertions(+), 225 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 316cc5ac0da7..b91373eb1c79 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -913,10 +913,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .getsockopt = ip_getsockopt, .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ip_setsockopt, - .compat_getsockopt = compat_ip_getsockopt, -#endif }; static int dccp_v4_init_sock(struct sock *sk) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 73bb88fbe546..86b3b9a7cea3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -679,20 +679,48 @@ Eaddrnotavail: return -EADDRNOTAVAIL; } +static int copy_group_source_from_user(struct group_source_req *greqs, + void __user *optval, int optlen) +{ + if (in_compat_syscall()) { + struct compat_group_source_req gr32; + + if (optlen != sizeof(gr32)) + return -EINVAL; + if (copy_from_user(&gr32, optval, sizeof(gr32))) + return -EFAULT; + greqs->gsr_interface = gr32.gsr_interface; + greqs->gsr_group = gr32.gsr_group; + greqs->gsr_source = gr32.gsr_source; + } else { + if (optlen != sizeof(*greqs)) + return -EINVAL; + if (copy_from_user(greqs, optval, sizeof(*greqs))) + return -EFAULT; + } + + return 0; +} + static int do_mcast_group_source(struct sock *sk, int optname, - struct group_source_req *greqs) + void __user *optval, int optlen) { + struct group_source_req greqs; struct ip_mreq_source mreqs; struct sockaddr_in *psin; int omode, add, err; - if (greqs->gsr_group.ss_family != AF_INET || - greqs->gsr_source.ss_family != AF_INET) + err = copy_group_source_from_user(&greqs, optval, optlen); + if (err) + return err; + + if (greqs.gsr_group.ss_family != AF_INET || + greqs.gsr_source.ss_family != AF_INET) return -EADDRNOTAVAIL; - psin = (struct sockaddr_in *)&greqs->gsr_group; + psin = (struct sockaddr_in *)&greqs.gsr_group; mreqs.imr_multiaddr = psin->sin_addr.s_addr; - psin = (struct sockaddr_in *)&greqs->gsr_source; + psin = (struct sockaddr_in *)&greqs.gsr_source; mreqs.imr_sourceaddr = psin->sin_addr.s_addr; mreqs.imr_interface = 0; /* use index for mc_source */ @@ -705,21 +733,21 @@ static int do_mcast_group_source(struct sock *sk, int optname, } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct ip_mreqn mreq; - psin = (struct sockaddr_in *)&greqs->gsr_group; + psin = (struct sockaddr_in *)&greqs.gsr_group; mreq.imr_multiaddr = psin->sin_addr; mreq.imr_address.s_addr = 0; - mreq.imr_ifindex = greqs->gsr_interface; + mreq.imr_ifindex = greqs.gsr_interface; err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); if (err && err != -EADDRINUSE) return err; - greqs->gsr_interface = mreq.imr_ifindex; + greqs.gsr_interface = mreq.imr_ifindex; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } - return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); + return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); } static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval, @@ -754,7 +782,6 @@ out_free_gsf: return err; } -#ifdef CONFIG_COMPAT static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval, int optlen) { @@ -788,23 +815,16 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval, if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) goto out_free_gsf; - rtnl_lock(); - lock_sock(sk); - /* numsrc >= (4G-140)/128 overflow in 32 bits */ err = -ENOBUFS; if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) - goto out_unlock; + goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, &gf32->gf_group, gf32->gf_slist); -out_unlock: - release_sock(sk); - rtnl_unlock(); out_free_gsf: kfree(p); return err; } -#endif static int ip_mcast_join_leave(struct sock *sk, int optname, void __user *optval, int optlen) @@ -828,14 +848,12 @@ static int ip_mcast_join_leave(struct sock *sk, int optname, return ip_mc_leave_group(sk, &mreq); } -#ifdef CONFIG_COMPAT static int compat_ip_mcast_join_leave(struct sock *sk, int optname, void __user *optval, int optlen) { struct compat_group_req greq; struct ip_mreqn mreq = { }; struct sockaddr_in *psin; - int err; if (optlen < sizeof(struct compat_group_req)) return -EINVAL; @@ -848,17 +866,10 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, mreq.imr_multiaddr = psin->sin_addr; mreq.imr_ifindex = greq.gr_interface; - rtnl_lock(); - lock_sock(sk); if (optname == MCAST_JOIN_GROUP) - err = ip_mc_join_group(sk, &mreq); - else - err = ip_mc_leave_group(sk, &mreq); - release_sock(sk); - rtnl_unlock(); - return err; + return ip_mc_join_group(sk, &mreq); + return ip_mc_leave_group(sk, &mreq); } -#endif static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) @@ -1265,26 +1276,23 @@ static int do_ip_setsockopt(struct sock *sk, int level, } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - err = ip_mcast_join_leave(sk, optname, optval, optlen); + if (in_compat_syscall()) + err = compat_ip_mcast_join_leave(sk, optname, optval, + optlen); + else + err = ip_mcast_join_leave(sk, optname, optval, optlen); break; case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: - { - struct group_source_req greqs; - - if (optlen != sizeof(struct group_source_req)) - goto e_inval; - if (copy_from_user(&greqs, optval, sizeof(greqs))) { - err = -EFAULT; - break; - } - err = do_mcast_group_source(sk, optname, &greqs); + err = do_mcast_group_source(sk, optname, optval, optlen); break; - } case MCAST_MSFILTER: - err = ip_set_mcast_msfilter(sk, optval, optlen); + if (in_compat_syscall()) + err = compat_ip_set_mcast_msfilter(sk, optval, optlen); + else + err = ip_set_mcast_msfilter(sk, optval, optlen); break; case IP_MULTICAST_ALL: if (optlen < 1) @@ -1410,62 +1418,6 @@ int ip_setsockopt(struct sock *sk, int level, } EXPORT_SYMBOL(ip_setsockopt); -#ifdef CONFIG_COMPAT -int compat_ip_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - int err; - - if (level != SOL_IP) - return -ENOPROTOOPT; - - switch (optname) { - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - return compat_ip_mcast_join_leave(sk, optname, optval, optlen); - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - { - struct compat_group_source_req __user *gsr32 = (void __user *)optval; - struct group_source_req greqs; - - if (optlen != sizeof(struct compat_group_source_req)) - return -EINVAL; - - if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || - copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, - sizeof(greqs.gsr_group)) || - copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, - sizeof(greqs.gsr_source))) - return -EFAULT; - - rtnl_lock(); - lock_sock(sk); - err = do_mcast_group_source(sk, optname, &greqs); - release_sock(sk); - rtnl_unlock(); - return err; - } - case MCAST_MSFILTER: - return compat_ip_set_mcast_msfilter(sk, optval, optlen); - } - - err = do_ip_setsockopt(sk, level, optname, optval, optlen); -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_HDRINCL && - optname != IP_IPSEC_POLICY && - optname != IP_XFRM_POLICY && - !ip_mroute_opt(optname)) - err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); -#endif - return err; -} -EXPORT_SYMBOL(compat_ip_setsockopt); -#endif - /* * Get the options. Note for future reference. The GET of IP options gets * the _received_ ones. The set sets the _sent_ ones. @@ -1507,22 +1459,18 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, return 0; } -#ifdef CONFIG_COMPAT static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, - int __user *optlen) + int __user *optlen, int len) { const int size0 = offsetof(struct compat_group_filter, gf_slist); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; - int len, err; int num; + int err; - if (get_user(len, optlen)) - return -EFAULT; if (len < size0) return -EINVAL; - if (copy_from_user(&gf32, p, size0)) return -EFAULT; @@ -1531,11 +1479,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; - rtnl_lock(); - lock_sock(sk); err = ip_mc_gsfget(sk, &gf, p->gf_slist); - release_sock(sk); - rtnl_unlock(); if (err) return err; if (gf.gf_numsrc < num) @@ -1547,10 +1491,9 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, return -EFAULT; return 0; } -#endif static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen, unsigned int flags) + char __user *optval, int __user *optlen) { struct inet_sock *inet = inet_sk(sk); bool needs_rtnl = getsockopt_needs_rtnl(optname); @@ -1707,7 +1650,11 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, goto out; } case MCAST_MSFILTER: - err = ip_get_mcast_msfilter(sk, optval, optlen, len); + if (in_compat_syscall()) + err = compat_ip_get_mcast_msfilter(sk, optval, optlen, + len); + else + err = ip_get_mcast_msfilter(sk, optval, optlen, len); goto out; case IP_MULTICAST_ALL: val = inet->mc_all; @@ -1724,7 +1671,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, msg.msg_control_is_user = true; msg.msg_control_user = optval; msg.msg_controllen = len; - msg.msg_flags = flags; + msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; @@ -1788,45 +1735,7 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_GET_INFO && - optname < BPFILTER_IPT_GET_MAX) - err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); -#endif -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && - !ip_mroute_opt(optname)) { - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - err = nf_getsockopt(sk, PF_INET, optname, optval, &len); - if (err >= 0) - err = put_user(len, optlen); - return err; - } -#endif - return err; -} -EXPORT_SYMBOL(ip_getsockopt); - -#ifdef CONFIG_COMPAT -int compat_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - int err; - - if (optname == MCAST_MSFILTER) { - if (level != SOL_IP) - return -EOPNOTSUPP; - return compat_ip_get_mcast_msfilter(sk, optval, optlen); - } - - err = do_ip_getsockopt(sk, level, optname, optval, optlen, - MSG_CMSG_COMPAT); + err = do_ip_getsockopt(sk, level, optname, optval, optlen); #if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_GET_INFO && @@ -1850,5 +1759,4 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, #endif return err; } -EXPORT_SYMBOL(compat_ip_getsockopt); -#endif +EXPORT_SYMBOL(ip_getsockopt); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 47665919048f..2a57d633b31e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -857,16 +857,6 @@ static int raw_setsockopt(struct sock *sk, int level, int optname, return do_raw_setsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -static int compat_raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (level != SOL_RAW) - return compat_ip_setsockopt(sk, level, optname, optval, optlen); - return do_raw_setsockopt(sk, level, optname, optval, optlen); -} -#endif - static int do_raw_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -887,16 +877,6 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, return do_raw_getsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -static int compat_raw_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level != SOL_RAW) - return compat_ip_getsockopt(sk, level, optname, optval, optlen); - return do_raw_getsockopt(sk, level, optname, optval, optlen); -} -#endif - static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { @@ -980,8 +960,6 @@ struct proto raw_prot = { .usersize = sizeof_field(struct raw_sock, filter), .h.raw_hash = &raw_v4_hashinfo, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_raw_setsockopt, - .compat_getsockopt = compat_raw_getsockopt, .compat_ioctl = compat_raw_ioctl, #endif .diag_destroy = raw_abort, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 116c11a0aaed..e5b7ef9a2887 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2134,10 +2134,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .getsockopt = ip_getsockopt, .addr2sockaddr = inet_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ip_setsockopt, - .compat_getsockopt = compat_ip_getsockopt, -#endif .mtu_reduced = tcp_v4_mtu_reduced, }; EXPORT_SYMBOL(ipv4_specific); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 073d346f515c..d4be4471c424 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2656,17 +2656,6 @@ int udp_setsockopt(struct sock *sk, int level, int optname, return ip_setsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return compat_ip_setsockopt(sk, level, optname, optval, optlen); -} -#endif - int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -2732,15 +2721,6 @@ int udp_getsockopt(struct sock *sk, int level, int optname, return ip_getsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -int compat_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ip_getsockopt(sk, level, optname, optval, optlen); -} -#endif /** * udp_poll - wait for a UDP event. * @file: - file struct @@ -2812,10 +2792,6 @@ struct proto udp_prot = { .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), .h.udp_table = &udp_table, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, -#endif .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 6b2fa77eeb1c..ab313702c87f 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -17,12 +17,6 @@ int udp_setsockopt(struct sock *sk, int level, int optname, int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -#ifdef CONFIG_COMPAT -int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -int compat_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -#endif int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 5936d66d1ce2..bd8773b49e72 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -56,10 +56,6 @@ struct proto udplite_prot = { .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, -#endif }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index f8d7412cfb3d..2a3fd31fb589 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -612,10 +612,6 @@ static struct proto l2tp_ip_prot = { .hash = l2tp_ip_hash, .unhash = l2tp_ip_unhash, .obj_size = sizeof(struct l2tp_ip_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ip_setsockopt, - .compat_getsockopt = compat_ip_getsockopt, -#endif }; static const struct proto_ops l2tp_ip_ops = { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8d25cc464efd..7ecaf7d575c0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1089,10 +1089,6 @@ static struct sctp_af sctp_af_inet = { .net_header_len = sizeof(struct iphdr), .sockaddr_len = sizeof(struct sockaddr_in), .ip_options_len = sctp_v4_ip_options_len, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ip_setsockopt, - .compat_getsockopt = compat_ip_getsockopt, -#endif }; struct sctp_pf *sctp_get_pf_specific(sa_family_t family) -- cgit From d5541e85cd40118a817f015ce3af1f41a7d7de1b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:27 +0200 Subject: net/ipv6: factor out MCAST_MSFILTER getsockopt helpers Factor out one helper each for getting the native and compat version of the MCAST_MSFILTER option. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 139 +++++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 65 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6adfbdcb7979..ef5656f876ac 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1071,6 +1071,77 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, return len; } +static int ipv6_get_msfilter(struct sock *sk, void __user *optval, + int __user *optlen, int len) +{ + const int size0 = offsetof(struct group_filter, gf_slist); + struct group_filter __user *p = optval; + struct group_filter gsf; + int num; + int err; + + if (len < size0) + return -EINVAL; + if (copy_from_user(&gsf, p, size0)) + return -EFAULT; + if (gsf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + num = gsf.gf_numsrc; + lock_sock(sk); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + if (!err) { + if (num > gsf.gf_numsrc) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; + } + release_sock(sk); + return err; +} + +#ifdef CONFIG_COMPAT +static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, + int __user *optlen) +{ + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = optval; + struct compat_group_filter gf32; + struct group_filter gf; + int len, err; + int num; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + if (gf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + lock_sock(sk); + err = ip6_mc_msfget(sk, &gf, p->gf_slist); + release_sock(sk); + if (err) + return err; + if (num > gf.gf_numsrc) + num = gf.gf_numsrc; + len = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); + if (put_user(len, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; +} +#endif + static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen, unsigned int flags) { @@ -1094,33 +1165,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = sk->sk_family; break; case MCAST_MSFILTER: - { - struct group_filter __user *p = (void __user *)optval; - struct group_filter gsf; - const int size0 = offsetof(struct group_filter, gf_slist); - int num; - int err; - - if (len < size0) - return -EINVAL; - if (copy_from_user(&gsf, p, size0)) - return -EFAULT; - if (gsf.gf_group.ss_family != AF_INET6) - return -EADDRNOTAVAIL; - num = gsf.gf_numsrc; - lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist); - if (!err) { - if (num > gsf.gf_numsrc) - num = gsf.gf_numsrc; - if (put_user(GROUP_FILTER_SIZE(num), optlen) || - copy_to_user(p, &gsf, size0)) - err = -EFAULT; - } - release_sock(sk); - return err; - } - + return ipv6_get_msfilter(sk, optval, optlen, len); case IPV6_2292PKTOPTIONS: { struct msghdr msg; @@ -1481,44 +1526,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname == MCAST_MSFILTER) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); - struct compat_group_filter __user *p = (void __user *)optval; - struct compat_group_filter gf32; - struct group_filter gf; - int ulen, err; - int num; - - if (get_user(ulen, optlen)) - return -EFAULT; - - if (ulen < size0) - return -EINVAL; - - if (copy_from_user(&gf32, p, size0)) - return -EFAULT; - - gf.gf_interface = gf32.gf_interface; - gf.gf_fmode = gf32.gf_fmode; - num = gf.gf_numsrc = gf32.gf_numsrc; - gf.gf_group = gf32.gf_group; - - if (gf.gf_group.ss_family != AF_INET6) - return -EADDRNOTAVAIL; - lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist); - release_sock(sk); - if (err) - return err; - if (num > gf.gf_numsrc) - num = gf.gf_numsrc; - ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); - if (put_user(ulen, optlen) || - put_user(gf.gf_fmode, &p->gf_fmode) || - put_user(gf.gf_numsrc, &p->gf_numsrc)) - return -EFAULT; - return 0; - } + if (optname == MCAST_MSFILTER) + return compat_ipv6_get_msfilter(sk, optval, optlen); err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); -- cgit From ca0e65eb295411229a8dbdea01b9379e48d2bf39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:28 +0200 Subject: net/ipv6: factor out MCAST_MSFILTER setsockopt helpers Factor out one helper each for setting the native and compat version of the MCAST_MSFILTER option. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 159 +++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 76 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ef5656f876ac..6aa49495d7bc 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -171,6 +171,87 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname, return ip6_mc_source(add, omode, sk, greqs); } +static int ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, + int optlen) +{ + struct group_filter *gsf; + int ret; + + if (optlen < GROUP_FILTER_SIZE(0)) + return -EINVAL; + if (optlen > sysctl_optmem_max) + return -ENOBUFS; + + gsf = memdup_user(optval, optlen); + if (IS_ERR(gsf)) + return PTR_ERR(gsf); + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + ret = -ENOBUFS; + if (gsf->gf_numsrc >= 0x1ffffffU || + gsf->gf_numsrc > sysctl_mld_max_msf) + goto out_free_gsf; + + ret = -EINVAL; + if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) + goto out_free_gsf; + + ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); +out_free_gsf: + kfree(gsf); + return ret; +} + +#ifdef CONFIG_COMPAT +static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, + int optlen) +{ + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int ret; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + ret = -EFAULT; + if (copy_from_user(gf32, optval, optlen)) + goto out_free_p; + + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + ret = -ENOBUFS; + n = gf32->gf_numsrc; + if (n >= 0x1ffffffU || n > sysctl_mld_max_msf) + goto out_free_p; + + ret = -EINVAL; + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + goto out_free_p; + + rtnl_lock(); + lock_sock(sk); + ret = ip6_mc_msfilter(sk, &(struct group_filter){ + .gf_interface = gf32->gf_interface, + .gf_group = gf32->gf_group, + .gf_fmode = gf32->gf_fmode, + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); + +out_free_p: + kfree(p); + return ret; +} +#endif + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -762,37 +843,8 @@ done: break; } case MCAST_MSFILTER: - { - struct group_filter *gsf; - - if (optlen < GROUP_FILTER_SIZE(0)) - goto e_inval; - if (optlen > sysctl_optmem_max) { - retv = -ENOBUFS; - break; - } - gsf = memdup_user(optval, optlen); - if (IS_ERR(gsf)) { - retv = PTR_ERR(gsf); - break; - } - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (gsf->gf_numsrc >= 0x1ffffffU || - gsf->gf_numsrc > sysctl_mld_max_msf) { - kfree(gsf); - retv = -ENOBUFS; - break; - } - if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { - kfree(gsf); - retv = -EINVAL; - break; - } - retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); - kfree(gsf); - + retv = ipv6_set_mcast_msfilter(sk, optval, optlen); break; - } case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; @@ -977,52 +1029,7 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, return err; } case MCAST_MSFILTER: - { - const int size0 = offsetof(struct compat_group_filter, gf_slist); - struct compat_group_filter *gf32; - void *p; - int n; - - if (optlen < size0) - return -EINVAL; - if (optlen > sysctl_optmem_max - 4) - return -ENOBUFS; - - p = kmalloc(optlen + 4, GFP_KERNEL); - if (!p) - return -ENOMEM; - - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ - if (copy_from_user(gf32, optval, optlen)) { - err = -EFAULT; - goto mc_msf_out; - } - - n = gf32->gf_numsrc; - /* numsrc >= (4G-140)/128 overflow in 32 bits */ - if (n >= 0x1ffffffU || - n > sysctl_mld_max_msf) { - err = -ENOBUFS; - goto mc_msf_out; - } - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { - err = -EINVAL; - goto mc_msf_out; - } - - rtnl_lock(); - lock_sock(sk); - err = ip6_mc_msfilter(sk, &(struct group_filter){ - .gf_interface = gf32->gf_interface, - .gf_group = gf32->gf_group, - .gf_fmode = gf32->gf_fmode, - .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); - release_sock(sk); - rtnl_unlock(); -mc_msf_out: - kfree(p); - return err; - } + return compat_ipv6_set_mcast_msfilter(sk, optval, optlen); } err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); -- cgit From fdf5bdd87c01571f3256a799bdc217e7cca35fbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:29 +0200 Subject: net/ipv6: factor out mcast join/leave setsockopt helpers Factor out one helper each for setting the native and compat version of the MCAST_MSFILTER option. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 103 ++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6aa49495d7bc..1ea0cd12beae 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -252,6 +252,56 @@ out_free_p: } #endif +static int ipv6_mcast_join_leave(struct sock *sk, int optname, + void __user *optval, int optlen) +{ + struct sockaddr_in6 *psin6; + struct group_req greq; + + if (optlen < sizeof(greq)) + return -EINVAL; + if (copy_from_user(&greq, optval, sizeof(greq))) + return -EFAULT; + + if (greq.gr_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + psin6 = (struct sockaddr_in6 *)&greq.gr_group; + if (optname == MCAST_JOIN_GROUP) + return ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); + return ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr); +} + +#ifdef CONFIG_COMPAT +static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, + void __user *optval, int optlen) +{ + struct compat_group_req gr32; + struct sockaddr_in6 *psin6; + int err; + + if (optlen < sizeof(gr32)) + return -EINVAL; + if (copy_from_user(&gr32, optval, sizeof(gr32))) + return -EFAULT; + + if (gr32.gr_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + rtnl_lock(); + lock_sock(sk); + psin6 = (struct sockaddr_in6 *)&gr32.gr_group; + if (optname == MCAST_JOIN_GROUP) + err = ipv6_sock_mc_join(sk, gr32.gr_interface, + &psin6->sin6_addr); + else + err = ipv6_sock_mc_drop(sk, gr32.gr_interface, + &psin6->sin6_addr); + release_sock(sk); + rtnl_unlock(); + return err; +} +#endif + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -803,29 +853,8 @@ done: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - { - struct group_req greq; - struct sockaddr_in6 *psin6; - - if (optlen < sizeof(struct group_req)) - goto e_inval; - - retv = -EFAULT; - if (copy_from_user(&greq, optval, sizeof(struct group_req))) - break; - if (greq.gr_group.ss_family != AF_INET6) { - retv = -EADDRNOTAVAIL; - break; - } - psin6 = (struct sockaddr_in6 *)&greq.gr_group; - if (optname == MCAST_JOIN_GROUP) - retv = ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); - else - retv = ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); + retv = ipv6_mcast_join_leave(sk, optname, optval, optlen); break; - } case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: @@ -975,34 +1004,8 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - { - struct compat_group_req __user *gr32 = (void __user *)optval; - struct group_req greq; - struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group; - - if (optlen < sizeof(struct compat_group_req)) - return -EINVAL; - - if (get_user(greq.gr_interface, &gr32->gr_interface) || - copy_from_user(&greq.gr_group, &gr32->gr_group, - sizeof(greq.gr_group))) - return -EFAULT; - - if (greq.gr_group.ss_family != AF_INET6) - return -EADDRNOTAVAIL; - - rtnl_lock(); - lock_sock(sk); - if (optname == MCAST_JOIN_GROUP) - err = ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); - else - err = ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); - release_sock(sk); - rtnl_unlock(); - return err; - } + return compat_ipv6_mcast_join_leave(sk, optname, optval, + optlen); case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: -- cgit From 3021ad529950d07e0408d65d0f1df00454c1d223 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:30 +0200 Subject: net/ipv6: remove compat_ipv6_{get,set}sockopt Handle the few cases that need special treatment in-line using in_compat_syscall(). This also removes all the now unused compat_{get,set}sockopt methods. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/sock.c | 10 --- net/dccp/dccp.h | 6 -- net/dccp/ipv4.c | 4 - net/dccp/ipv6.c | 12 --- net/dccp/proto.c | 26 ------ net/ipv4/inet_connection_sock.c | 28 ------ net/ipv4/tcp.c | 24 ------ net/ipv4/tcp_ipv4.c | 4 - net/ipv6/ipv6_sockglue.c | 183 +++++++++++----------------------------- net/ipv6/raw.c | 50 ----------- net/ipv6/tcp_ipv6.c | 12 --- net/ipv6/udp.c | 25 ------ net/ipv6/udp_impl.h | 6 -- net/ipv6/udplite.c | 4 - net/l2tp/l2tp_ip6.c | 4 - net/sctp/ipv6.c | 4 - 16 files changed, 51 insertions(+), 351 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 018404d17626..48655d5c4cf3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3199,11 +3199,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; -#ifdef CONFIG_COMPAT - if (in_compat_syscal() && sk->sk_prot->compat_getsockopt) - return sk->sk_prot->compat_getsockopt(sk, level, optname, - optval, optlen); -#endif return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_getsockopt); @@ -3231,11 +3226,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; -#ifdef CONFIG_COMPAT - if (in_compat_syscall() && sk->sk_prot->compat_setsockopt) - return sk->sk_prot->compat_setsockopt(sk, level, optname, - optval, optlen); -#endif return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_setsockopt); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 7dce4f6c7025..434eea91b767 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -296,12 +296,6 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); -#ifdef CONFIG_COMPAT -int compat_dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -int compat_dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -#endif int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b91373eb1c79..9c28c8251125 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -959,10 +959,6 @@ static struct proto dccp_v4_prot = { .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_dccp_setsockopt, - .compat_getsockopt = compat_dccp_getsockopt, -#endif }; static const struct net_protocol dccp_v4_protocol = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b50f85a72cd5..ef4ab28cfde0 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -970,10 +970,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -#endif }; /* @@ -990,10 +986,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -#endif }; /* NOTE: A lot of things set to zero explicitly by call to @@ -1049,10 +1041,6 @@ static struct proto dccp_v6_prot = { .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_dccp_setsockopt, - .compat_getsockopt = compat_dccp_getsockopt, -#endif }; static const struct inet6_protocol dccp_v6_protocol = { diff --git a/net/dccp/proto.c b/net/dccp/proto.c index c13b6609474b..fd92d3fe321f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -575,19 +575,6 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(dccp_setsockopt); -#ifdef CONFIG_COMPAT -int compat_dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (level != SOL_DCCP) - return inet_csk_compat_setsockopt(sk, level, optname, - optval, optlen); - return do_dccp_setsockopt(sk, level, optname, optval, optlen); -} - -EXPORT_SYMBOL_GPL(compat_dccp_setsockopt); -#endif - static int dccp_getsockopt_service(struct sock *sk, int len, __be32 __user *optval, int __user *optlen) @@ -696,19 +683,6 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(dccp_getsockopt); -#ifdef CONFIG_COMPAT -int compat_dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level != SOL_DCCP) - return inet_csk_compat_getsockopt(sk, level, optname, - optval, optlen); - return do_dccp_getsockopt(sk, level, optname, optval, optlen); -} - -EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); -#endif - static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) { struct cmsghdr *cmsg; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 22b0e7336360..d1a3913eebe0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1057,34 +1057,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) } EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); -#ifdef CONFIG_COMPAT -int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_af_ops->compat_getsockopt) - return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname, - optval, optlen); - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); -} -EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); - -int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_af_ops->compat_setsockopt) - return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname, - optval, optlen); - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); -} -EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); -#endif - static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 254b6a4cc95b..58ede3d62b2e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3337,18 +3337,6 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } EXPORT_SYMBOL(tcp_setsockopt); -#ifdef CONFIG_COMPAT -int compat_tcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (level != SOL_TCP) - return inet_csk_compat_setsockopt(sk, level, optname, - optval, optlen); - return do_tcp_setsockopt(sk, level, optname, optval, optlen); -} -EXPORT_SYMBOL(compat_tcp_setsockopt); -#endif - static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) { @@ -3896,18 +3884,6 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, } EXPORT_SYMBOL(tcp_getsockopt); -#ifdef CONFIG_COMPAT -int compat_tcp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level != SOL_TCP) - return inet_csk_compat_getsockopt(sk, level, optname, - optval, optlen); - return do_tcp_getsockopt(sk, level, optname, optval, optlen); -} -EXPORT_SYMBOL(compat_tcp_getsockopt); -#endif - #ifdef CONFIG_TCP_MD5SIG static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e5b7ef9a2887..cd81b6e04efb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2769,10 +2769,6 @@ struct proto tcp_prot = { .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, .no_autobind = true, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_tcp_setsockopt, - .compat_getsockopt = compat_tcp_getsockopt, -#endif .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1ea0cd12beae..add8f7912299 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -136,13 +136,42 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int copy_group_source_from_user(struct group_source_req *greqs, + void __user *optval, int optlen) +{ + if (in_compat_syscall()) { + struct compat_group_source_req gr32; + + if (optlen < sizeof(gr32)) + return -EINVAL; + if (copy_from_user(&gr32, optval, sizeof(gr32))) + return -EFAULT; + greqs->gsr_interface = gr32.gsr_interface; + greqs->gsr_group = gr32.gsr_group; + greqs->gsr_source = gr32.gsr_source; + } else { + if (optlen < sizeof(*greqs)) + return -EINVAL; + if (copy_from_user(greqs, optval, sizeof(*greqs))) + return -EFAULT; + } + + return 0; +} + static int do_ipv6_mcast_group_source(struct sock *sk, int optname, - struct group_source_req *greqs) + void __user *optval, int optlen) { + struct group_source_req greqs; int omode, add; + int ret; + + ret = copy_group_source_from_user(&greqs, optval, optlen); + if (ret) + return ret; - if (greqs->gsr_group.ss_family != AF_INET6 || - greqs->gsr_source.ss_family != AF_INET6) + if (greqs.gsr_group.ss_family != AF_INET6 || + greqs.gsr_source.ss_family != AF_INET6) return -EADDRNOTAVAIL; if (optname == MCAST_BLOCK_SOURCE) { @@ -155,8 +184,8 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname, struct sockaddr_in6 *psin6; int retv; - psin6 = (struct sockaddr_in6 *)&greqs->gsr_group; - retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface, + psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; + retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, &psin6->sin6_addr, MCAST_INCLUDE); /* prior join w/ different source is ok */ @@ -168,7 +197,7 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname, omode = MCAST_INCLUDE; add = 0; } - return ip6_mc_source(add, omode, sk, greqs); + return ip6_mc_source(add, omode, sk, &greqs); } static int ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, @@ -202,7 +231,6 @@ out_free_gsf: return ret; } -#ifdef CONFIG_COMPAT static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, int optlen) { @@ -236,21 +264,16 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) goto out_free_p; - rtnl_lock(); - lock_sock(sk); ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); - release_sock(sk); - rtnl_unlock(); out_free_p: kfree(p); return ret; } -#endif static int ipv6_mcast_join_leave(struct sock *sk, int optname, void __user *optval, int optlen) @@ -272,13 +295,11 @@ static int ipv6_mcast_join_leave(struct sock *sk, int optname, return ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr); } -#ifdef CONFIG_COMPAT static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, void __user *optval, int optlen) { struct compat_group_req gr32; struct sockaddr_in6 *psin6; - int err; if (optlen < sizeof(gr32)) return -EINVAL; @@ -287,20 +308,12 @@ static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, if (gr32.gr_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; - rtnl_lock(); - lock_sock(sk); psin6 = (struct sockaddr_in6 *)&gr32.gr_group; if (optname == MCAST_JOIN_GROUP) - err = ipv6_sock_mc_join(sk, gr32.gr_interface, + return ipv6_sock_mc_join(sk, gr32.gr_interface, &psin6->sin6_addr); - else - err = ipv6_sock_mc_drop(sk, gr32.gr_interface, - &psin6->sin6_addr); - release_sock(sk); - rtnl_unlock(); - return err; + return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr); } -#endif static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) @@ -853,26 +866,25 @@ done: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: - retv = ipv6_mcast_join_leave(sk, optname, optval, optlen); + if (in_compat_syscall()) + retv = compat_ipv6_mcast_join_leave(sk, optname, optval, + optlen); + else + retv = ipv6_mcast_join_leave(sk, optname, optval, + optlen); break; case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: - { - struct group_source_req greqs; - - if (optlen < sizeof(struct group_source_req)) - goto e_inval; - if (copy_from_user(&greqs, optval, sizeof(greqs))) { - retv = -EFAULT; - break; - } - retv = do_ipv6_mcast_group_source(sk, optname, &greqs); + retv = do_ipv6_mcast_group_source(sk, optname, optval, optlen); break; - } case MCAST_MSFILTER: - retv = ipv6_set_mcast_msfilter(sk, optval, optlen); + if (in_compat_syscall()) + retv = compat_ipv6_set_mcast_msfilter(sk, optval, + optlen); + else + retv = ipv6_set_mcast_msfilter(sk, optval, optlen); break; case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) @@ -989,64 +1001,6 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL(ipv6_setsockopt); -#ifdef CONFIG_COMPAT -int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - int err; - - if (level == SOL_IP && sk->sk_type != SOCK_RAW) - return udp_prot.setsockopt(sk, level, optname, optval, optlen); - - if (level != SOL_IPV6) - return -ENOPROTOOPT; - - switch (optname) { - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - return compat_ipv6_mcast_join_leave(sk, optname, optval, - optlen); - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - { - struct compat_group_source_req __user *gsr32 = (void __user *)optval; - struct group_source_req greqs; - - if (optlen < sizeof(struct compat_group_source_req)) - return -EINVAL; - - if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || - copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, - sizeof(greqs.gsr_group)) || - copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, - sizeof(greqs.gsr_source))) - return -EFAULT; - - rtnl_lock(); - lock_sock(sk); - err = do_ipv6_mcast_group_source(sk, optname, &greqs); - release_sock(sk); - rtnl_unlock(); - return err; - } - case MCAST_MSFILTER: - return compat_ipv6_set_mcast_msfilter(sk, optval, optlen); - } - - err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && - optname != IPV6_XFRM_POLICY) - err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); -#endif - return err; -} -EXPORT_SYMBOL(compat_ipv6_setsockopt); -#endif - static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, int optname, char __user *optval, int len) { @@ -1110,7 +1064,6 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, return err; } -#ifdef CONFIG_COMPAT static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen) { @@ -1150,7 +1103,6 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EFAULT; return 0; } -#endif static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen, unsigned int flags) @@ -1175,6 +1127,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = sk->sk_family; break; case MCAST_MSFILTER: + if (in_compat_syscall()) + return compat_ipv6_get_msfilter(sk, optval, optlen); return ipv6_get_msfilter(sk, optval, optlen, len); case IPV6_2292PKTOPTIONS: { @@ -1523,38 +1477,3 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, return err; } EXPORT_SYMBOL(ipv6_getsockopt); - -#ifdef CONFIG_COMPAT -int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - int err; - - if (level == SOL_IP && sk->sk_type != SOCK_RAW) - return udp_prot.getsockopt(sk, level, optname, optval, optlen); - - if (level != SOL_IPV6) - return -ENOPROTOOPT; - - if (optname == MCAST_MSFILTER) - return compat_ipv6_get_msfilter(sk, optval, optlen); - - err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, - MSG_CMSG_COMPAT); -#ifdef CONFIG_NETFILTER - /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); - if (err >= 0) - err = put_user(len, optlen); - } -#endif - return err; -} -EXPORT_SYMBOL(compat_ipv6_getsockopt); -#endif diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e23c6b461758..594e01ad670a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1084,30 +1084,6 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, return do_rawv6_setsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - switch (level) { - case SOL_RAW: - break; - case SOL_ICMPV6: - if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; - return rawv6_seticmpfilter(sk, level, optname, optval, optlen); - case SOL_IPV6: - if (optname == IPV6_CHECKSUM || - optname == IPV6_HDRINCL) - break; - fallthrough; - default: - return compat_ipv6_setsockopt(sk, level, optname, - optval, optlen); - } - return do_rawv6_setsockopt(sk, level, optname, optval, optlen); -} -#endif - static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -1169,30 +1145,6 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, return do_rawv6_getsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - switch (level) { - case SOL_RAW: - break; - case SOL_ICMPV6: - if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; - return rawv6_geticmpfilter(sk, level, optname, optval, optlen); - case SOL_IPV6: - if (optname == IPV6_CHECKSUM || - optname == IPV6_HDRINCL) - break; - fallthrough; - default: - return compat_ipv6_getsockopt(sk, level, optname, - optval, optlen); - } - return do_rawv6_getsockopt(sk, level, optname, optval, optlen); -} -#endif - static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { @@ -1297,8 +1249,6 @@ struct proto rawv6_prot = { .usersize = sizeof_field(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT - .compat_setsockopt = compat_rawv6_setsockopt, - .compat_getsockopt = compat_rawv6_getsockopt, .compat_ioctl = compat_rawv6_ioctl, #endif .diag_destroy = raw_abort, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4502db706f75..c34b7834fd84 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1831,10 +1831,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -#endif .mtu_reduced = tcp_v6_mtu_reduced, }; @@ -1861,10 +1857,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -#endif .mtu_reduced = tcp_v4_mtu_reduced, }; @@ -2122,10 +2114,6 @@ struct proto tcpv6_prot = { .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, .no_autobind = true, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_tcp_setsockopt, - .compat_getsockopt = compat_tcp_getsockopt, -#endif .diag_destroy = tcp_abort, }; EXPORT_SYMBOL_GPL(tcpv6_prot); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 38c0d9350c6b..5aff0856a05b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1570,17 +1570,6 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, return ipv6_setsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) -{ - if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_v6_push_pending_frames); - return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); -} -#endif - int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -1589,16 +1578,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, return ipv6_getsockopt(sk, level, optname, optval, optlen); } -#ifdef CONFIG_COMPAT -int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); -} -#endif - /* thinking of making this const? Don't. * early_demux can change based on sysctl. */ @@ -1681,10 +1660,6 @@ struct proto udpv6_prot = { .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udp_table, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, -#endif .diag_destroy = udp_abort, }; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 20e324b6f358..30dfb6f1b762 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -19,12 +19,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); -#ifdef CONFIG_COMPAT -int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -#endif int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index bf7a7acd39b1..fbb700d3f437 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -52,10 +52,6 @@ struct proto udplitev6_prot = { .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udplite_table, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udpv6_setsockopt, - .compat_getsockopt = compat_udpv6_getsockopt, -#endif }; static struct inet_protosw udplite6_protosw = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 2cdc0b7a7a43..4799bec87b33 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -745,10 +745,6 @@ static struct proto l2tp_ip6_prot = { .hash = l2tp_ip6_hash, .unhash = l2tp_ip6_unhash, .obj_size = sizeof(struct l2tp_ip6_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -#endif }; static const struct proto_ops l2tp_ip6_ops = { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ebda31b7747d..aea2a982984d 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1087,10 +1087,6 @@ static struct sctp_af sctp_af_inet6 = { .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), .ip_options_len = sctp_v6_ip_options_len, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_ipv6_setsockopt, - .compat_getsockopt = compat_ipv6_getsockopt, -#endif }; static struct sctp_pf sctp_pf_inet6 = { -- cgit From a44d9e72100f7044ac46e4e6dc475f5b4097830f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Jul 2020 08:23:31 +0200 Subject: net: make ->{get,set}sockopt in proto_ops optional Just check for a NULL method instead of wiring up sock_no_{get,set}sockopt. Signed-off-by: Christoph Hellwig Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 2 -- net/bluetooth/bnep/sock.c | 2 -- net/bluetooth/cmtp/sock.c | 2 -- net/bluetooth/hidp/sock.c | 2 -- net/caif/caif_socket.c | 2 -- net/can/bcm.c | 2 -- net/core/sock.c | 14 -------------- net/key/af_key.c | 2 -- net/nfc/llcp_sock.c | 2 -- net/nfc/rawsock.c | 4 ---- net/packet/af_packet.c | 2 -- net/phonet/socket.c | 2 -- net/qrtr/qrtr.c | 2 -- net/smc/af_smc.c | 9 +++++++-- net/socket.c | 4 ++++ net/unix/af_unix.c | 6 ------ net/vmw_vsock/af_vsock.c | 2 -- 17 files changed, 11 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 15787e8c0629..1d48708c5a2e 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1917,8 +1917,6 @@ static const struct proto_ops atalk_dgram_ops = { #endif .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = atalk_sendmsg, .recvmsg = atalk_recvmsg, .mmap = sock_no_mmap, diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index cfd83c5521ae..d515571b2afb 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -182,8 +182,6 @@ static const struct proto_ops bnep_sock_ops = { .recvmsg = sock_no_recvmsg, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index defdd4871919..96d49d9fae96 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -185,8 +185,6 @@ static const struct proto_ops cmtp_sock_ops = { .recvmsg = sock_no_recvmsg, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 03be6a4baef3..595fb3c9d6c3 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -233,8 +233,6 @@ static const struct proto_ops hidp_sock_ops = { .recvmsg = sock_no_recvmsg, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index ef14da50a981..b94ecd931002 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -981,7 +981,6 @@ static const struct proto_ops caif_seqpacket_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = caif_seqpkt_sendmsg, .recvmsg = caif_seqpkt_recvmsg, .mmap = sock_no_mmap, @@ -1002,7 +1001,6 @@ static const struct proto_ops caif_stream_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = caif_stream_sendmsg, .recvmsg = caif_stream_recvmsg, .mmap = sock_no_mmap, diff --git a/net/can/bcm.c b/net/can/bcm.c index c96fa0f33db3..d14ea12affb1 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1648,8 +1648,6 @@ static const struct proto_ops bcm_ops = { .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, .mmap = sock_no_mmap, diff --git a/net/core/sock.c b/net/core/sock.c index 48655d5c4cf3..d828bfe1c47d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2783,20 +2783,6 @@ int sock_no_shutdown(struct socket *sock, int how) } EXPORT_SYMBOL(sock_no_shutdown); -int sock_no_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(sock_no_setsockopt); - -int sock_no_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(sock_no_getsockopt); - int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; diff --git a/net/key/af_key.c b/net/key/af_key.c index b67ed3a8486c..f13626c1a985 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3734,8 +3734,6 @@ static const struct proto_ops pfkey_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 28604414dec1..6da1e2334bb6 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -921,8 +921,6 @@ static const struct proto_ops llcp_rawsock_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = sock_no_sendmsg, .recvmsg = llcp_sock_recvmsg, .mmap = sock_no_mmap, diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index ba5ffd3badd3..b2061b6746ea 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -276,8 +276,6 @@ static const struct proto_ops rawsock_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = rawsock_sendmsg, .recvmsg = rawsock_recvmsg, .mmap = sock_no_mmap, @@ -296,8 +294,6 @@ static const struct proto_ops rawsock_raw_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = sock_no_sendmsg, .recvmsg = rawsock_recvmsg, .mmap = sock_no_mmap, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 35aee9e98053..c240fb5de3f0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4503,8 +4503,6 @@ static const struct proto_ops packet_ops_spkt = { .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, .mmap = sock_no_mmap, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 87c60f83c180..2599235d592e 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -439,8 +439,6 @@ const struct proto_ops phonet_dgram_ops = { .ioctl = pn_socket_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 24a8c3c6da0d..0cb4adfc6641 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1208,8 +1208,6 @@ static const struct proto_ops qrtr_proto_ops = { .gettstamp = sock_gettstamp, .poll = datagram_poll, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .release = qrtr_release, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 903321543838..9711c9e0e515 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1742,8 +1742,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* generic setsockopts reaching us here always apply to the * CLC socket */ - rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, - optval, optlen); + if (unlikely(!smc->clcsock->ops->setsockopt)) + rc = -EOPNOTSUPP; + else + rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, + optval, optlen); if (smc->clcsock->sk->sk_err) { sk->sk_err = smc->clcsock->sk->sk_err; sk->sk_error_report(sk); @@ -1808,6 +1811,8 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, smc = smc_sk(sock->sk); /* socket options apply to the CLC socket */ + if (unlikely(!smc->clcsock->ops->getsockopt)) + return -EOPNOTSUPP; return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, optval, optlen); } diff --git a/net/socket.c b/net/socket.c index dec345982abb..93846568c2fb 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2131,6 +2131,8 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval, if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) err = sock_setsockopt(sock, level, optname, optval, optlen); + else if (unlikely(!sock->ops->setsockopt)) + err = -EOPNOTSUPP; else err = sock->ops->setsockopt(sock, level, optname, optval, optlen); @@ -2175,6 +2177,8 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval, if (level == SOL_SOCKET) err = sock_getsockopt(sock, level, optname, optval, optlen); + else if (unlikely(!sock->ops->getsockopt)) + err = -EOPNOTSUPP; else err = sock->ops->getsockopt(sock, level, optname, optval, optlen); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3385a7a0b231..181ea6fb56a6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -714,8 +714,6 @@ static const struct proto_ops unix_stream_ops = { #endif .listen = unix_listen, .shutdown = unix_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, @@ -741,8 +739,6 @@ static const struct proto_ops unix_dgram_ops = { #endif .listen = sock_no_listen, .shutdown = unix_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = unix_dgram_sendmsg, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, @@ -767,8 +763,6 @@ static const struct proto_ops unix_seqpacket_ops = { #endif .listen = unix_listen, .shutdown = unix_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 626bf9044418..df204c6761c4 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1202,8 +1202,6 @@ static const struct proto_ops vsock_dgram_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = vsock_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, .sendmsg = vsock_dgram_sendmsg, .recvmsg = vsock_dgram_recvmsg, .mmap = sock_no_mmap, -- cgit From ca84bd058daefef1d184dd0a7b6b43c67339e72c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:38 +0200 Subject: sctp: copy the optval from user space in sctp_setsockopt Prepare for for moving the copy_from_user from the individual sockopts to the main setsockopt helper. As of this commit the kopt variable is not used yet, but the following commits will start using it. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d57e1a002ffc..af1ebc8313d3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4677,6 +4677,7 @@ out: static int sctp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { + void *kopt = NULL; int retval = 0; pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); @@ -4693,6 +4694,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, goto out_nounlock; } + if (optlen > 0) { + kopt = memdup_user(optval, optlen); + if (IS_ERR(kopt)) + return PTR_ERR(kopt); + } + lock_sock(sk); switch (optname) { @@ -4878,6 +4885,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, } release_sock(sk); + kfree(kopt); out_nounlock: return retval; -- cgit From 8c7517f54c8f24f3633383e8b552c1858fe95389 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:39 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_bindx Rename sctp_setsockopt_bindx_kernel back to sctp_setsockopt_bindx, and use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer in the old sctp_setsockopt_bindx. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index af1ebc8313d3..85ba5155b177 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -979,9 +979,8 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -static int sctp_setsockopt_bindx_kernel(struct sock *sk, - struct sockaddr *addrs, int addrs_size, - int op) +static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *addrs, + int addrs_size, int op) { int err; int addrcnt = 0; @@ -991,7 +990,7 @@ static int sctp_setsockopt_bindx_kernel(struct sock *sk, struct sctp_af *af; pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", - __func__, sk, addrs, addrs_size, op); + __func__, sk, addr_buf, addrs_size, op); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1037,29 +1036,13 @@ static int sctp_setsockopt_bindx_kernel(struct sock *sk, } } -static int sctp_setsockopt_bindx(struct sock *sk, - struct sockaddr __user *addrs, - int addrs_size, int op) -{ - struct sockaddr *kaddrs; - int err; - - kaddrs = memdup_user(addrs, addrs_size); - if (IS_ERR(kaddrs)) - return PTR_ERR(kaddrs); - err = sctp_setsockopt_bindx_kernel(sk, kaddrs, addrs_size, op); - kfree(kaddrs); - return err; -} - static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs, int addrlen) { int err; lock_sock(sk); - err = sctp_setsockopt_bindx_kernel(sk, addrs, addrlen, - SCTP_BINDX_ADD_ADDR); + err = sctp_setsockopt_bindx(sk, addrs, addrlen, SCTP_BINDX_ADD_ADDR); release_sock(sk); return err; } @@ -4705,14 +4688,14 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case SCTP_SOCKOPT_BINDX_ADD: /* 'optlen' is the size of the addresses buffer. */ - retval = sctp_setsockopt_bindx(sk, (struct sockaddr __user *)optval, - optlen, SCTP_BINDX_ADD_ADDR); + retval = sctp_setsockopt_bindx(sk, kopt, optlen, + SCTP_BINDX_ADD_ADDR); break; case SCTP_SOCKOPT_BINDX_REM: /* 'optlen' is the size of the addresses buffer. */ - retval = sctp_setsockopt_bindx(sk, (struct sockaddr __user *)optval, - optlen, SCTP_BINDX_REM_ADDR); + retval = sctp_setsockopt_bindx(sk, kopt, optlen, + SCTP_BINDX_REM_ADDR); break; case SCTP_SOCKOPT_CONNECTX_OLD: -- cgit From ce5b2f8929df3ecac3cea44371637241ba816827 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:40 +0200 Subject: sctp: pass a kernel pointer to __sctp_setsockopt_connectx Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 50 +++++++++++++++++++------------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 85ba5155b177..44cf2848146a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1286,36 +1286,29 @@ out_free: * it. * * sk The sk of the socket - * addrs The pointer to the addresses in user land + * addrs The pointer to the addresses * addrssize Size of the addrs buffer * * Returns >=0 if ok, <0 errno code on error. */ -static int __sctp_setsockopt_connectx(struct sock *sk, - struct sockaddr __user *addrs, - int addrs_size, - sctp_assoc_t *assoc_id) +static int __sctp_setsockopt_connectx(struct sock *sk, struct sockaddr *kaddrs, + int addrs_size, sctp_assoc_t *assoc_id) { - struct sockaddr *kaddrs; int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", - __func__, sk, addrs, addrs_size); + __func__, sk, kaddrs, addrs_size); /* make sure the 1st addr's sa_family is accessible later */ if (unlikely(addrs_size < sizeof(sa_family_t))) return -EINVAL; - kaddrs = memdup_user(addrs, addrs_size); - if (IS_ERR(kaddrs)) - return PTR_ERR(kaddrs); - /* Allow security module to validate connectx addresses. */ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_CONNECTX, (struct sockaddr *)kaddrs, addrs_size); if (err) - goto out_free; + return err; /* in-kernel sockets don't generally have a file allocated to them * if all they do is call sock_create_kern(). @@ -1323,12 +1316,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, if (sk->sk_socket->file) flags = sk->sk_socket->file->f_flags; - err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); - -out_free: - kfree(kaddrs); - - return err; + return __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); } /* @@ -1336,10 +1324,10 @@ out_free: * to the option that doesn't provide association id. */ static int sctp_setsockopt_connectx_old(struct sock *sk, - struct sockaddr __user *addrs, + struct sockaddr *kaddrs, int addrs_size) { - return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); + return __sctp_setsockopt_connectx(sk, kaddrs, addrs_size, NULL); } /* @@ -1349,13 +1337,13 @@ static int sctp_setsockopt_connectx_old(struct sock *sk, * always positive. */ static int sctp_setsockopt_connectx(struct sock *sk, - struct sockaddr __user *addrs, + struct sockaddr *kaddrs, int addrs_size) { sctp_assoc_t assoc_id = 0; int err = 0; - err = __sctp_setsockopt_connectx(sk, addrs, addrs_size, &assoc_id); + err = __sctp_setsockopt_connectx(sk, kaddrs, addrs_size, &assoc_id); if (err) return err; @@ -1385,6 +1373,7 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len, { struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; + struct sockaddr *kaddrs; int err = 0; #ifdef CONFIG_COMPAT @@ -1408,9 +1397,12 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len, return -EFAULT; } - err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *) - param.addrs, param.addr_num, - &assoc_id); + kaddrs = memdup_user(param.addrs, param.addr_num); + if (IS_ERR(kaddrs)) + return PTR_ERR(kaddrs); + + err = __sctp_setsockopt_connectx(sk, kaddrs, param.addr_num, &assoc_id); + kfree(kaddrs); if (err == 0 || err == -EINPROGRESS) { if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) return -EFAULT; @@ -4700,16 +4692,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_SOCKOPT_CONNECTX_OLD: /* 'optlen' is the size of the addresses buffer. */ - retval = sctp_setsockopt_connectx_old(sk, - (struct sockaddr __user *)optval, - optlen); + retval = sctp_setsockopt_connectx_old(sk, kopt, optlen); break; case SCTP_SOCKOPT_CONNECTX: /* 'optlen' is the size of the addresses buffer. */ - retval = sctp_setsockopt_connectx(sk, - (struct sockaddr __user *)optval, - optlen); + retval = sctp_setsockopt_connectx(sk, kopt, optlen); break; case SCTP_DISABLE_FRAGMENTS: -- cgit From 1083582558c3bdda87ae13cc6728e17d583b0b74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:41 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_disable_fragments Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 44cf2848146a..b259ea94aedd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2184,20 +2184,12 @@ out: * exceeds the current PMTU size, the message will NOT be sent and * instead a error will be indicated to the user. */ -static int sctp_setsockopt_disable_fragments(struct sock *sk, - char __user *optval, +static int sctp_setsockopt_disable_fragments(struct sock *sk, int *val, unsigned int optlen) { - int val; - if (optlen < sizeof(int)) return -EINVAL; - - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - sctp_sk(sk)->disable_fragments = (val == 0) ? 0 : 1; - + sctp_sk(sk)->disable_fragments = (*val == 0) ? 0 : 1; return 0; } @@ -4701,7 +4693,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_DISABLE_FRAGMENTS: - retval = sctp_setsockopt_disable_fragments(sk, optval, optlen); + retval = sctp_setsockopt_disable_fragments(sk, kopt, optlen); break; case SCTP_EVENTS: -- cgit From a98d21a173d1b6245310e90bbf60ad17125dadf3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:42 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_events Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b259ea94aedd..bc37174bd71a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2193,11 +2193,9 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk, int *val, return 0; } -static int sctp_setsockopt_events(struct sock *sk, char __user *optval, +static int sctp_setsockopt_events(struct sock *sk, __u8 *sn_type, unsigned int optlen) { - struct sctp_event_subscribe subscribe; - __u8 *sn_type = (__u8 *)&subscribe; struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; int i; @@ -2205,9 +2203,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (optlen > sizeof(struct sctp_event_subscribe)) return -EINVAL; - if (copy_from_user(&subscribe, optval, optlen)) - return -EFAULT; - for (i = 0; i < optlen; i++) sctp_ulpevent_type_set(&sp->subscribe, SCTP_SN_TYPE_BASE + i, sn_type[i]); @@ -4697,7 +4692,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_EVENTS: - retval = sctp_setsockopt_events(sk, optval, optlen); + retval = sctp_setsockopt_events(sk, kopt, optlen); break; case SCTP_AUTOCLOSE: -- cgit From 0b49a65c77d8a4864e1d524dc51c6a4b8a6e1585 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:43 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_autoclose Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bc37174bd71a..d7ed8c6ea375 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2242,7 +2242,7 @@ static int sctp_setsockopt_events(struct sock *sk, __u8 *sn_type, * integer defining the number of seconds of idle time before an * association is closed. */ -static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, +static int sctp_setsockopt_autoclose(struct sock *sk, u32 *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); @@ -2253,9 +2253,8 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EOPNOTSUPP; if (optlen != sizeof(int)) return -EINVAL; - if (copy_from_user(&sp->autoclose, optval, optlen)) - return -EFAULT; + sp->autoclose = *optval; if (sp->autoclose > net->sctp.max_autoclose) sp->autoclose = net->sctp.max_autoclose; @@ -4696,7 +4695,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_AUTOCLOSE: - retval = sctp_setsockopt_autoclose(sk, optval, optlen); + retval = sctp_setsockopt_autoclose(sk, kopt, optlen); break; case SCTP_PEER_ADDR_PARAMS: -- cgit From 9b7b0d1a395d54c12be9f18d1bf7be06aecaa785 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:44 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_peer_addr_params Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d7ed8c6ea375..19308c327b6d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2589,48 +2589,42 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, } static int sctp_setsockopt_peer_addr_params(struct sock *sk, - char __user *optval, + struct sctp_paddrparams *params, unsigned int optlen) { - struct sctp_paddrparams params; struct sctp_transport *trans = NULL; struct sctp_association *asoc = NULL; struct sctp_sock *sp = sctp_sk(sk); int error; int hb_change, pmtud_change, sackdelay_change; - if (optlen == sizeof(params)) { - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - } else if (optlen == ALIGN(offsetof(struct sctp_paddrparams, + if (optlen == ALIGN(offsetof(struct sctp_paddrparams, spp_ipv6_flowlabel), 4)) { - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - if (params.spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL)) + if (params->spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL)) return -EINVAL; - } else { + } else if (optlen != sizeof(*params)) { return -EINVAL; } /* Validate flags and value parameters. */ - hb_change = params.spp_flags & SPP_HB; - pmtud_change = params.spp_flags & SPP_PMTUD; - sackdelay_change = params.spp_flags & SPP_SACKDELAY; + hb_change = params->spp_flags & SPP_HB; + pmtud_change = params->spp_flags & SPP_PMTUD; + sackdelay_change = params->spp_flags & SPP_SACKDELAY; if (hb_change == SPP_HB || pmtud_change == SPP_PMTUD || sackdelay_change == SPP_SACKDELAY || - params.spp_sackdelay > 500 || - (params.spp_pathmtu && - params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) + params->spp_sackdelay > 500 || + (params->spp_pathmtu && + params->spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) return -EINVAL; /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spp_address)) { - trans = sctp_addr_id2transport(sk, ¶ms.spp_address, - params.spp_assoc_id); + if (!sctp_is_any(sk, (union sctp_addr *)¶ms->spp_address)) { + trans = sctp_addr_id2transport(sk, ¶ms->spp_address, + params->spp_assoc_id); if (!trans) return -EINVAL; } @@ -2639,19 +2633,19 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ - asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->spp_assoc_id); + if (!asoc && params->spp_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* Heartbeat demand can only be sent on a transport or * association, but not a socket. */ - if (params.spp_flags & SPP_HB_DEMAND && !trans && !asoc) + if (params->spp_flags & SPP_HB_DEMAND && !trans && !asoc) return -EINVAL; /* Process parameters. */ - error = sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, + error = sctp_apply_peer_addr_params(params, trans, asoc, sp, hb_change, pmtud_change, sackdelay_change); @@ -2664,7 +2658,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, if (!trans && asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { - sctp_apply_peer_addr_params(¶ms, trans, asoc, sp, + sctp_apply_peer_addr_params(params, trans, asoc, sp, hb_change, pmtud_change, sackdelay_change); } @@ -4699,7 +4693,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_PEER_ADDR_PARAMS: - retval = sctp_setsockopt_peer_addr_params(sk, optval, optlen); + retval = sctp_setsockopt_peer_addr_params(sk, kopt, optlen); break; case SCTP_DELAYED_SACK: -- cgit From ebb25defdc17b594715418f1aa99eeb9a217cf1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:45 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_delayed_ack Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 19308c327b6d..2dfbd7f87991 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2751,17 +2751,14 @@ static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, */ static int sctp_setsockopt_delayed_ack(struct sock *sk, - char __user *optval, unsigned int optlen) + struct sctp_sack_info *params, + unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_sack_info params; if (optlen == sizeof(struct sctp_sack_info)) { - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - - if (params.sack_delay == 0 && params.sack_freq == 0) + if (params->sack_delay == 0 && params->sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { pr_warn_ratelimited(DEPRECATED @@ -2769,59 +2766,57 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, "Use of struct sctp_assoc_value in delayed_ack socket option.\n" "Use struct sctp_sack_info instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - if (params.sack_delay == 0) - params.sack_freq = 1; + if (params->sack_delay == 0) + params->sack_freq = 1; else - params.sack_freq = 0; + params->sack_freq = 0; } else return -EINVAL; /* Validate value parameter. */ - if (params.sack_delay > 500) + if (params->sack_delay > 500) return -EINVAL; /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the * socket is a one to many style socket, and an association * was not found, then the id was invalid. */ - asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, params->sack_assoc_id); + if (!asoc && params->sack_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { - sctp_apply_asoc_delayed_ack(¶ms, asoc); + sctp_apply_asoc_delayed_ack(params, asoc); return 0; } if (sctp_style(sk, TCP)) - params.sack_assoc_id = SCTP_FUTURE_ASSOC; + params->sack_assoc_id = SCTP_FUTURE_ASSOC; - if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || - params.sack_assoc_id == SCTP_ALL_ASSOC) { - if (params.sack_delay) { - sp->sackdelay = params.sack_delay; + if (params->sack_assoc_id == SCTP_FUTURE_ASSOC || + params->sack_assoc_id == SCTP_ALL_ASSOC) { + if (params->sack_delay) { + sp->sackdelay = params->sack_delay; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } - if (params.sack_freq == 1) { + if (params->sack_freq == 1) { sp->param_flags = sctp_spp_sackdelay_disable(sp->param_flags); - } else if (params.sack_freq > 1) { - sp->sackfreq = params.sack_freq; + } else if (params->sack_freq > 1) { + sp->sackfreq = params->sack_freq; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } } - if (params.sack_assoc_id == SCTP_CURRENT_ASSOC || - params.sack_assoc_id == SCTP_ALL_ASSOC) + if (params->sack_assoc_id == SCTP_CURRENT_ASSOC || + params->sack_assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &sp->ep->asocs, asocs) - sctp_apply_asoc_delayed_ack(¶ms, asoc); + sctp_apply_asoc_delayed_ack(params, asoc); return 0; } @@ -4697,7 +4692,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_DELAYED_SACK: - retval = sctp_setsockopt_delayed_ack(sk, optval, optlen); + retval = sctp_setsockopt_delayed_ack(sk, kopt, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen); -- cgit From bb13d647d95bd0fdbc4882c27ea818b4c879263f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:46 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_partial_delivery_point Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2dfbd7f87991..deccfe54d2fc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3487,24 +3487,19 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk, * call as long as the user provided buffer is large enough to hold the * message. */ -static int sctp_setsockopt_partial_delivery_point(struct sock *sk, - char __user *optval, +static int sctp_setsockopt_partial_delivery_point(struct sock *sk, u32 *val, unsigned int optlen) { - u32 val; - if (optlen != sizeof(u32)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; /* Note: We double the receive buffer from what the user sets * it to be, also initial rwnd is based on rcvbuf/2. */ - if (val > (sk->sk_rcvbuf >> 1)) + if (*val > (sk->sk_rcvbuf >> 1)) return -EINVAL; - sctp_sk(sk)->pd_point = val; + sctp_sk(sk)->pd_point = *val; return 0; /* is this the right error code? */ } @@ -4695,7 +4690,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_delayed_ack(sk, kopt, optlen); break; case SCTP_PARTIAL_DELIVERY_POINT: - retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen); + retval = sctp_setsockopt_partial_delivery_point(sk, kopt, optlen); break; case SCTP_INITMSG: -- cgit From 9dfa6f0494899e4643ec82c907a7664777262b8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:47 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_initmsg Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index deccfe54d2fc..d62c02d0b793 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2832,24 +2832,22 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, * by the change). With TCP-style sockets, this option is inherited by * sockets derived from a listener socket. */ -static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, unsigned int optlen) +static int sctp_setsockopt_initmsg(struct sock *sk, struct sctp_initmsg *sinit, + unsigned int optlen) { - struct sctp_initmsg sinit; struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof(struct sctp_initmsg)) return -EINVAL; - if (copy_from_user(&sinit, optval, optlen)) - return -EFAULT; - if (sinit.sinit_num_ostreams) - sp->initmsg.sinit_num_ostreams = sinit.sinit_num_ostreams; - if (sinit.sinit_max_instreams) - sp->initmsg.sinit_max_instreams = sinit.sinit_max_instreams; - if (sinit.sinit_max_attempts) - sp->initmsg.sinit_max_attempts = sinit.sinit_max_attempts; - if (sinit.sinit_max_init_timeo) - sp->initmsg.sinit_max_init_timeo = sinit.sinit_max_init_timeo; + if (sinit->sinit_num_ostreams) + sp->initmsg.sinit_num_ostreams = sinit->sinit_num_ostreams; + if (sinit->sinit_max_instreams) + sp->initmsg.sinit_max_instreams = sinit->sinit_max_instreams; + if (sinit->sinit_max_attempts) + sp->initmsg.sinit_max_attempts = sinit->sinit_max_attempts; + if (sinit->sinit_max_init_timeo) + sp->initmsg.sinit_max_init_timeo = sinit->sinit_max_init_timeo; return 0; } @@ -4694,7 +4692,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, break; case SCTP_INITMSG: - retval = sctp_setsockopt_initmsg(sk, optval, optlen); + retval = sctp_setsockopt_initmsg(sk, kopt, optlen); break; case SCTP_DEFAULT_SEND_PARAM: retval = sctp_setsockopt_default_send_param(sk, optval, -- cgit From c23ad6d2b71cf2731e162143a6665d6491766c61 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:48 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_default_send_param Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 56 ++++++++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d62c02d0b793..9bcbb065cf9e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2867,57 +2867,54 @@ static int sctp_setsockopt_initmsg(struct sock *sk, struct sctp_initmsg *sinit, * to this call if the caller is using the UDP model. */ static int sctp_setsockopt_default_send_param(struct sock *sk, - char __user *optval, + struct sctp_sndrcvinfo *info, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_sndrcvinfo info; - if (optlen != sizeof(info)) + if (optlen != sizeof(*info)) return -EINVAL; - if (copy_from_user(&info, optval, optlen)) - return -EFAULT; - if (info.sinfo_flags & + if (info->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_ABORT | SCTP_EOF)) return -EINVAL; - asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, info->sinfo_assoc_id); + if (!asoc && info->sinfo_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { - asoc->default_stream = info.sinfo_stream; - asoc->default_flags = info.sinfo_flags; - asoc->default_ppid = info.sinfo_ppid; - asoc->default_context = info.sinfo_context; - asoc->default_timetolive = info.sinfo_timetolive; + asoc->default_stream = info->sinfo_stream; + asoc->default_flags = info->sinfo_flags; + asoc->default_ppid = info->sinfo_ppid; + asoc->default_context = info->sinfo_context; + asoc->default_timetolive = info->sinfo_timetolive; return 0; } if (sctp_style(sk, TCP)) - info.sinfo_assoc_id = SCTP_FUTURE_ASSOC; + info->sinfo_assoc_id = SCTP_FUTURE_ASSOC; - if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || - info.sinfo_assoc_id == SCTP_ALL_ASSOC) { - sp->default_stream = info.sinfo_stream; - sp->default_flags = info.sinfo_flags; - sp->default_ppid = info.sinfo_ppid; - sp->default_context = info.sinfo_context; - sp->default_timetolive = info.sinfo_timetolive; + if (info->sinfo_assoc_id == SCTP_FUTURE_ASSOC || + info->sinfo_assoc_id == SCTP_ALL_ASSOC) { + sp->default_stream = info->sinfo_stream; + sp->default_flags = info->sinfo_flags; + sp->default_ppid = info->sinfo_ppid; + sp->default_context = info->sinfo_context; + sp->default_timetolive = info->sinfo_timetolive; } - if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC || - info.sinfo_assoc_id == SCTP_ALL_ASSOC) { + if (info->sinfo_assoc_id == SCTP_CURRENT_ASSOC || + info->sinfo_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { - asoc->default_stream = info.sinfo_stream; - asoc->default_flags = info.sinfo_flags; - asoc->default_ppid = info.sinfo_ppid; - asoc->default_context = info.sinfo_context; - asoc->default_timetolive = info.sinfo_timetolive; + asoc->default_stream = info->sinfo_stream; + asoc->default_flags = info->sinfo_flags; + asoc->default_ppid = info->sinfo_ppid; + asoc->default_context = info->sinfo_context; + asoc->default_timetolive = info->sinfo_timetolive; } } @@ -4695,8 +4692,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_initmsg(sk, kopt, optlen); break; case SCTP_DEFAULT_SEND_PARAM: - retval = sctp_setsockopt_default_send_param(sk, optval, - optlen); + retval = sctp_setsockopt_default_send_param(sk, kopt, optlen); break; case SCTP_DEFAULT_SNDINFO: retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen); -- cgit From 8a2409d3566b3965347d73b4ca008eaa76f4bbfc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:49 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_default_sndinfo Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9bcbb065cf9e..6695fa1cb0ca 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2925,54 +2925,51 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, * (SCTP_DEFAULT_SNDINFO) */ static int sctp_setsockopt_default_sndinfo(struct sock *sk, - char __user *optval, + struct sctp_sndinfo *info, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_sndinfo info; - if (optlen != sizeof(info)) + if (optlen != sizeof(*info)) return -EINVAL; - if (copy_from_user(&info, optval, optlen)) - return -EFAULT; - if (info.snd_flags & + if (info->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | SCTP_ABORT | SCTP_EOF)) return -EINVAL; - asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, info->snd_assoc_id); + if (!asoc && info->snd_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { - asoc->default_stream = info.snd_sid; - asoc->default_flags = info.snd_flags; - asoc->default_ppid = info.snd_ppid; - asoc->default_context = info.snd_context; + asoc->default_stream = info->snd_sid; + asoc->default_flags = info->snd_flags; + asoc->default_ppid = info->snd_ppid; + asoc->default_context = info->snd_context; return 0; } if (sctp_style(sk, TCP)) - info.snd_assoc_id = SCTP_FUTURE_ASSOC; + info->snd_assoc_id = SCTP_FUTURE_ASSOC; - if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || - info.snd_assoc_id == SCTP_ALL_ASSOC) { - sp->default_stream = info.snd_sid; - sp->default_flags = info.snd_flags; - sp->default_ppid = info.snd_ppid; - sp->default_context = info.snd_context; + if (info->snd_assoc_id == SCTP_FUTURE_ASSOC || + info->snd_assoc_id == SCTP_ALL_ASSOC) { + sp->default_stream = info->snd_sid; + sp->default_flags = info->snd_flags; + sp->default_ppid = info->snd_ppid; + sp->default_context = info->snd_context; } - if (info.snd_assoc_id == SCTP_CURRENT_ASSOC || - info.snd_assoc_id == SCTP_ALL_ASSOC) { + if (info->snd_assoc_id == SCTP_CURRENT_ASSOC || + info->snd_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { - asoc->default_stream = info.snd_sid; - asoc->default_flags = info.snd_flags; - asoc->default_ppid = info.snd_ppid; - asoc->default_context = info.snd_context; + asoc->default_stream = info->snd_sid; + asoc->default_flags = info->snd_flags; + asoc->default_ppid = info->snd_ppid; + asoc->default_context = info->snd_context; } } @@ -4695,7 +4692,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_default_send_param(sk, kopt, optlen); break; case SCTP_DEFAULT_SNDINFO: - retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen); + retval = sctp_setsockopt_default_sndinfo(sk, kopt, optlen); break; case SCTP_PRIMARY_ADDR: retval = sctp_setsockopt_primary_addr(sk, optval, optlen); -- cgit From 1eec69580414bd9f9be00ab2cf7b69d44adcae17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:50 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_primary_addr Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6695fa1cb0ca..abdec7b412bc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2982,10 +2982,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk, * the association primary. The enclosed address must be one of the * association peer's addresses. */ -static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval, +static int sctp_setsockopt_primary_addr(struct sock *sk, struct sctp_prim *prim, unsigned int optlen) { - struct sctp_prim prim; struct sctp_transport *trans; struct sctp_af *af; int err; @@ -2993,21 +2992,18 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval, if (optlen != sizeof(struct sctp_prim)) return -EINVAL; - if (copy_from_user(&prim, optval, sizeof(struct sctp_prim))) - return -EFAULT; - /* Allow security module to validate address but need address len. */ - af = sctp_get_af_specific(prim.ssp_addr.ss_family); + af = sctp_get_af_specific(prim->ssp_addr.ss_family); if (!af) return -EINVAL; err = security_sctp_bind_connect(sk, SCTP_PRIMARY_ADDR, - (struct sockaddr *)&prim.ssp_addr, + (struct sockaddr *)&prim->ssp_addr, af->sockaddr_len); if (err) return err; - trans = sctp_addr_id2transport(sk, &prim.ssp_addr, prim.ssp_assoc_id); + trans = sctp_addr_id2transport(sk, &prim->ssp_addr, prim->ssp_assoc_id); if (!trans) return -EINVAL; @@ -4695,7 +4691,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_default_sndinfo(sk, kopt, optlen); break; case SCTP_PRIMARY_ADDR: - retval = sctp_setsockopt_primary_addr(sk, optval, optlen); + retval = sctp_setsockopt_primary_addr(sk, kopt, optlen); break; case SCTP_SET_PEER_PRIMARY_ADDR: retval = sctp_setsockopt_peer_primary_addr(sk, optval, optlen); -- cgit From 46a0ae9de3186897211e04c2ed5d4a06b8b9e1cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:51 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_peer_primary_addr Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index abdec7b412bc..ff3b720eab0a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3284,12 +3284,12 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned * locally bound addresses. The following structure is used to make a * set primary request: */ -static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, +static int sctp_setsockopt_peer_primary_addr(struct sock *sk, + struct sctp_setpeerprim *prim, unsigned int optlen) { struct sctp_sock *sp; struct sctp_association *asoc = NULL; - struct sctp_setpeerprim prim; struct sctp_chunk *chunk; struct sctp_af *af; int err; @@ -3302,10 +3302,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva if (optlen != sizeof(struct sctp_setpeerprim)) return -EINVAL; - if (copy_from_user(&prim, optval, optlen)) - return -EFAULT; - - asoc = sctp_id2assoc(sk, prim.sspp_assoc_id); + asoc = sctp_id2assoc(sk, prim->sspp_assoc_id); if (!asoc) return -EINVAL; @@ -3318,26 +3315,26 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva if (!sctp_state(asoc, ESTABLISHED)) return -ENOTCONN; - af = sctp_get_af_specific(prim.sspp_addr.ss_family); + af = sctp_get_af_specific(prim->sspp_addr.ss_family); if (!af) return -EINVAL; - if (!af->addr_valid((union sctp_addr *)&prim.sspp_addr, sp, NULL)) + if (!af->addr_valid((union sctp_addr *)&prim->sspp_addr, sp, NULL)) return -EADDRNOTAVAIL; - if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim.sspp_addr)) + if (!sctp_assoc_lookup_laddr(asoc, (union sctp_addr *)&prim->sspp_addr)) return -EADDRNOTAVAIL; /* Allow security module to validate address. */ err = security_sctp_bind_connect(sk, SCTP_SET_PEER_PRIMARY_ADDR, - (struct sockaddr *)&prim.sspp_addr, + (struct sockaddr *)&prim->sspp_addr, af->sockaddr_len); if (err) return err; /* Create an ASCONF chunk with SET_PRIMARY parameter */ chunk = sctp_make_asconf_set_prim(asoc, - (union sctp_addr *)&prim.sspp_addr); + (union sctp_addr *)&prim->sspp_addr); if (!chunk) return -ENOMEM; @@ -4694,7 +4691,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_primary_addr(sk, kopt, optlen); break; case SCTP_SET_PEER_PRIMARY_ADDR: - retval = sctp_setsockopt_peer_primary_addr(sk, optval, optlen); + retval = sctp_setsockopt_peer_primary_addr(sk, kopt, optlen); break; case SCTP_NODELAY: retval = sctp_setsockopt_nodelay(sk, optval, optlen); -- cgit From f87ddbc0c0fbe1f0ebd368b4047c0af0250667c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:52 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_nodelay Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ff3b720eab0a..f9fe93e865b9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3020,17 +3020,12 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, struct sctp_prim *prim, * introduced, at the cost of more packets in the network. Expects an * integer boolean flag. */ -static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval, +static int sctp_setsockopt_nodelay(struct sock *sk, int *val, unsigned int optlen) { - int val; - if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - sctp_sk(sk)->nodelay = (val == 0) ? 0 : 1; + sctp_sk(sk)->nodelay = (*val == 0) ? 0 : 1; return 0; } @@ -4694,7 +4689,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_peer_primary_addr(sk, kopt, optlen); break; case SCTP_NODELAY: - retval = sctp_setsockopt_nodelay(sk, optval, optlen); + retval = sctp_setsockopt_nodelay(sk, kopt, optlen); break; case SCTP_RTOINFO: retval = sctp_setsockopt_rtoinfo(sk, optval, optlen); -- cgit From af5ae60e426a44003003177b176d60173e98dfa4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:53 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_rtoinfo Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f9fe93e865b9..6339a08b62dd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3041,9 +3041,10 @@ static int sctp_setsockopt_nodelay(struct sock *sk, int *val, * be changed. * */ -static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen) +static int sctp_setsockopt_rtoinfo(struct sock *sk, + struct sctp_rtoinfo *rtoinfo, + unsigned int optlen) { - struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; unsigned long rto_min, rto_max; struct sctp_sock *sp = sctp_sk(sk); @@ -3051,18 +3052,15 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne if (optlen != sizeof (struct sctp_rtoinfo)) return -EINVAL; - if (copy_from_user(&rtoinfo, optval, optlen)) - return -EFAULT; - - asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); + asoc = sctp_id2assoc(sk, rtoinfo->srto_assoc_id); /* Set the values to the specific association */ - if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + if (!asoc && rtoinfo->srto_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; - rto_max = rtoinfo.srto_max; - rto_min = rtoinfo.srto_min; + rto_max = rtoinfo->srto_max; + rto_min = rtoinfo->srto_min; if (rto_max) rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max; @@ -3078,17 +3076,17 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne return -EINVAL; if (asoc) { - if (rtoinfo.srto_initial != 0) + if (rtoinfo->srto_initial != 0) asoc->rto_initial = - msecs_to_jiffies(rtoinfo.srto_initial); + msecs_to_jiffies(rtoinfo->srto_initial); asoc->rto_max = rto_max; asoc->rto_min = rto_min; } else { /* If there is no association or the association-id = 0 * set the values to the endpoint. */ - if (rtoinfo.srto_initial != 0) - sp->rtoinfo.srto_initial = rtoinfo.srto_initial; + if (rtoinfo->srto_initial != 0) + sp->rtoinfo.srto_initial = rtoinfo->srto_initial; sp->rtoinfo.srto_max = rto_max; sp->rtoinfo.srto_min = rto_min; } @@ -4692,7 +4690,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_nodelay(sk, kopt, optlen); break; case SCTP_RTOINFO: - retval = sctp_setsockopt_rtoinfo(sk, optval, optlen); + retval = sctp_setsockopt_rtoinfo(sk, kopt, optlen); break; case SCTP_ASSOCINFO: retval = sctp_setsockopt_associnfo(sk, optval, optlen); -- cgit From 5b864c8dab18ddc5b34d7e9f5117c7d5185d0d4b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:54 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_associnfo Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6339a08b62dd..2a655c65e294 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3105,26 +3105,25 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, * See [SCTP] for more information. * */ -static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsigned int optlen) +static int sctp_setsockopt_associnfo(struct sock *sk, + struct sctp_assocparams *assocparams, + unsigned int optlen) { - struct sctp_assocparams assocparams; struct sctp_association *asoc; if (optlen != sizeof(struct sctp_assocparams)) return -EINVAL; - if (copy_from_user(&assocparams, optval, optlen)) - return -EFAULT; - asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); + asoc = sctp_id2assoc(sk, assocparams->sasoc_assoc_id); - if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + if (!asoc && assocparams->sasoc_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; /* Set the values to the specific association */ if (asoc) { - if (assocparams.sasoc_asocmaxrxt != 0) { + if (assocparams->sasoc_asocmaxrxt != 0) { __u32 path_sum = 0; int paths = 0; struct sctp_transport *peer_addr; @@ -3141,24 +3140,25 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig * then one path. */ if (paths > 1 && - assocparams.sasoc_asocmaxrxt > path_sum) + assocparams->sasoc_asocmaxrxt > path_sum) return -EINVAL; - asoc->max_retrans = assocparams.sasoc_asocmaxrxt; + asoc->max_retrans = assocparams->sasoc_asocmaxrxt; } - if (assocparams.sasoc_cookie_life != 0) - asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life); + if (assocparams->sasoc_cookie_life != 0) + asoc->cookie_life = + ms_to_ktime(assocparams->sasoc_cookie_life); } else { /* Set the values to the endpoint */ struct sctp_sock *sp = sctp_sk(sk); - if (assocparams.sasoc_asocmaxrxt != 0) + if (assocparams->sasoc_asocmaxrxt != 0) sp->assocparams.sasoc_asocmaxrxt = - assocparams.sasoc_asocmaxrxt; - if (assocparams.sasoc_cookie_life != 0) + assocparams->sasoc_asocmaxrxt; + if (assocparams->sasoc_cookie_life != 0) sp->assocparams.sasoc_cookie_life = - assocparams.sasoc_cookie_life; + assocparams->sasoc_cookie_life; } return 0; } @@ -4693,7 +4693,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_rtoinfo(sk, kopt, optlen); break; case SCTP_ASSOCINFO: - retval = sctp_setsockopt_associnfo(sk, optval, optlen); + retval = sctp_setsockopt_associnfo(sk, kopt, optlen); break; case SCTP_I_WANT_MAPPED_V4_ADDR: retval = sctp_setsockopt_mappedv4(sk, optval, optlen); -- cgit From ffc08f086a563cdd18f43edf574724f65e4bf5e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:55 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_mappedv4 Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2a655c65e294..5007af318e13 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3173,16 +3173,14 @@ static int sctp_setsockopt_associnfo(struct sock *sk, * addresses and a user will receive both PF_INET6 and PF_INET type * addresses on the socket. */ -static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsigned int optlen) +static int sctp_setsockopt_mappedv4(struct sock *sk, int *val, + unsigned int optlen) { - int val; struct sctp_sock *sp = sctp_sk(sk); if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; - if (val) + if (*val) sp->v4mapped = 1; else sp->v4mapped = 0; @@ -4696,7 +4694,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_associnfo(sk, kopt, optlen); break; case SCTP_I_WANT_MAPPED_V4_ADDR: - retval = sctp_setsockopt_mappedv4(sk, optval, optlen); + retval = sctp_setsockopt_mappedv4(sk, kopt, optlen); break; case SCTP_MAXSEG: retval = sctp_setsockopt_maxseg(sk, optval, optlen); -- cgit From dcd0357580cde2d058b50037c38ec499e7745497 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:56 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_maxseg Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5007af318e13..cc7d5430ad88 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3215,11 +3215,13 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, int *val, * changed (effecting future associations only). * assoc_value: This parameter specifies the maximum size in bytes. */ -static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) +static int sctp_setsockopt_maxseg(struct sock *sk, + struct sctp_assoc_value *params, + unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct sctp_assoc_value params; struct sctp_association *asoc; + sctp_assoc_t assoc_id; int val; if (optlen == sizeof(int)) { @@ -3228,19 +3230,17 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(&val, optval, optlen)) - return -EFAULT; - params.assoc_id = SCTP_FUTURE_ASSOC; + assoc_id = SCTP_FUTURE_ASSOC; + val = *(int *)params; } else if (optlen == sizeof(struct sctp_assoc_value)) { - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - val = params.assoc_value; + assoc_id = params->assoc_id; + val = params->assoc_value; } else { return -EINVAL; } - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, assoc_id); + if (!asoc && assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; @@ -4697,7 +4697,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_mappedv4(sk, kopt, optlen); break; case SCTP_MAXSEG: - retval = sctp_setsockopt_maxseg(sk, optval, optlen); + retval = sctp_setsockopt_maxseg(sk, kopt, optlen); break; case SCTP_ADAPTATION_LAYER: retval = sctp_setsockopt_adaptation_layer(sk, optval, optlen); -- cgit From 07e5035c6f1377e0f07d9dcaa1724c97ac56f543 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:57 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_adaptation_layer Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index cc7d5430ad88..b29452f58ff9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3336,17 +3336,14 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, return err; } -static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval, +static int sctp_setsockopt_adaptation_layer(struct sock *sk, + struct sctp_setadaptation *adapt, unsigned int optlen) { - struct sctp_setadaptation adaptation; - if (optlen != sizeof(struct sctp_setadaptation)) return -EINVAL; - if (copy_from_user(&adaptation, optval, optlen)) - return -EFAULT; - sctp_sk(sk)->adaptation_ind = adaptation.ssb_adaptation_ind; + sctp_sk(sk)->adaptation_ind = adapt->ssb_adaptation_ind; return 0; } @@ -4700,7 +4697,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_maxseg(sk, kopt, optlen); break; case SCTP_ADAPTATION_LAYER: - retval = sctp_setsockopt_adaptation_layer(sk, optval, optlen); + retval = sctp_setsockopt_adaptation_layer(sk, kopt, optlen); break; case SCTP_CONTEXT: retval = sctp_setsockopt_context(sk, optval, optlen); -- cgit From 722eca9ecac7f83a0378fc48aa1acd025d9dd1da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:58 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_context Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b29452f58ff9..2862047054d5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3362,40 +3362,38 @@ static int sctp_setsockopt_adaptation_layer(struct sock *sk, * received messages from the peer and does not effect the value that is * saved with outbound messages. */ -static int sctp_setsockopt_context(struct sock *sk, char __user *optval, +static int sctp_setsockopt_context(struct sock *sk, + struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct sctp_assoc_value params; struct sctp_association *asoc; if (optlen != sizeof(struct sctp_assoc_value)) return -EINVAL; - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { - asoc->default_rcv_context = params.assoc_value; + asoc->default_rcv_context = params->assoc_value; return 0; } if (sctp_style(sk, TCP)) - params.assoc_id = SCTP_FUTURE_ASSOC; + params->assoc_id = SCTP_FUTURE_ASSOC; - if (params.assoc_id == SCTP_FUTURE_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) - sp->default_rcv_context = params.assoc_value; + if (params->assoc_id == SCTP_FUTURE_ASSOC || + params->assoc_id == SCTP_ALL_ASSOC) + sp->default_rcv_context = params->assoc_value; - if (params.assoc_id == SCTP_CURRENT_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) + if (params->assoc_id == SCTP_CURRENT_ASSOC || + params->assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &sp->ep->asocs, asocs) - asoc->default_rcv_context = params.assoc_value; + asoc->default_rcv_context = params->assoc_value; return 0; } @@ -4700,7 +4698,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_adaptation_layer(sk, kopt, optlen); break; case SCTP_CONTEXT: - retval = sctp_setsockopt_context(sk, optval, optlen); + retval = sctp_setsockopt_context(sk, kopt, optlen); break; case SCTP_FRAGMENT_INTERLEAVE: retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen); -- cgit From 1031cea0019ec967a260ad835c0a5fe03d1b93a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:21:59 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_fragment_interleave Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2862047054d5..874cec731530 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3422,18 +3422,13 @@ static int sctp_setsockopt_context(struct sock *sk, * application using the one to many model may become confused and act * incorrectly. */ -static int sctp_setsockopt_fragment_interleave(struct sock *sk, - char __user *optval, +static int sctp_setsockopt_fragment_interleave(struct sock *sk, int *val, unsigned int optlen) { - int val; - if (optlen != sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; - sctp_sk(sk)->frag_interleave = !!val; + sctp_sk(sk)->frag_interleave = !!*val; if (!sctp_sk(sk)->frag_interleave) sctp_sk(sk)->ep->intl_enable = 0; @@ -4701,7 +4696,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_context(sk, kopt, optlen); break; case SCTP_FRAGMENT_INTERLEAVE: - retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen); + retval = sctp_setsockopt_fragment_interleave(sk, kopt, optlen); break; case SCTP_MAX_BURST: retval = sctp_setsockopt_maxburst(sk, optval, optlen); -- cgit From f5bee0adb17eaa2f106c895d198b1e8fca96f1de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:00 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_maxburst Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 874cec731530..ac7dff849290 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3482,12 +3482,13 @@ static int sctp_setsockopt_partial_delivery_point(struct sock *sk, u32 *val, * future associations inheriting the socket value. */ static int sctp_setsockopt_maxburst(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct sctp_assoc_value params; struct sctp_association *asoc; + sctp_assoc_t assoc_id; + u32 assoc_value; if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED @@ -3495,37 +3496,33 @@ static int sctp_setsockopt_maxburst(struct sock *sk, "Use of int in max_burst socket option deprecated.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(¶ms.assoc_value, optval, optlen)) - return -EFAULT; - params.assoc_id = SCTP_FUTURE_ASSOC; + assoc_id = SCTP_FUTURE_ASSOC; + assoc_value = *((int *)params); } else if (optlen == sizeof(struct sctp_assoc_value)) { - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; + assoc_id = params->assoc_id; + assoc_value = params->assoc_value; } else return -EINVAL; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && - sctp_style(sk, UDP)) + asoc = sctp_id2assoc(sk, assoc_id); + if (!asoc && assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { - asoc->max_burst = params.assoc_value; + asoc->max_burst = assoc_value; return 0; } if (sctp_style(sk, TCP)) - params.assoc_id = SCTP_FUTURE_ASSOC; + assoc_id = SCTP_FUTURE_ASSOC; - if (params.assoc_id == SCTP_FUTURE_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) - sp->max_burst = params.assoc_value; + if (assoc_id == SCTP_FUTURE_ASSOC || assoc_id == SCTP_ALL_ASSOC) + sp->max_burst = assoc_value; - if (params.assoc_id == SCTP_CURRENT_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) + if (assoc_id == SCTP_CURRENT_ASSOC || assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &sp->ep->asocs, asocs) - asoc->max_burst = params.assoc_value; + asoc->max_burst = assoc_value; return 0; } @@ -4699,7 +4696,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_fragment_interleave(sk, kopt, optlen); break; case SCTP_MAX_BURST: - retval = sctp_setsockopt_maxburst(sk, optval, optlen); + retval = sctp_setsockopt_maxburst(sk, kopt, optlen); break; case SCTP_AUTH_CHUNK: retval = sctp_setsockopt_auth_chunk(sk, optval, optlen); -- cgit From 88266d31b819202ea6aa7d87a3e9aa4b6d6a046f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:01 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_auth_chunk Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ac7dff849290..f68aa3936df3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3535,21 +3535,18 @@ static int sctp_setsockopt_maxburst(struct sock *sk, * will only effect future associations on the socket. */ static int sctp_setsockopt_auth_chunk(struct sock *sk, - char __user *optval, + struct sctp_authchunk *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authchunk val; if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) return -EINVAL; - if (copy_from_user(&val, optval, optlen)) - return -EFAULT; - switch (val.sauth_chunk) { + switch (val->sauth_chunk) { case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: case SCTP_CID_SHUTDOWN_COMPLETE: @@ -3558,7 +3555,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, } /* add this chunk id to the endpoint */ - return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk); + return sctp_auth_ep_add_chunkid(ep, val->sauth_chunk); } /* @@ -4699,7 +4696,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_maxburst(sk, kopt, optlen); break; case SCTP_AUTH_CHUNK: - retval = sctp_setsockopt_auth_chunk(sk, optval, optlen); + retval = sctp_setsockopt_auth_chunk(sk, kopt, optlen); break; case SCTP_HMAC_IDENT: retval = sctp_setsockopt_hmac_ident(sk, optval, optlen); -- cgit From 3564ef442a174e42b58a215a55e4f0ef56b5fbb0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:02 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_hmac_ident Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f68aa3936df3..a573af7dfe41 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3565,13 +3565,11 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, * endpoint requires the peer to use. */ static int sctp_setsockopt_hmac_ident(struct sock *sk, - char __user *optval, + struct sctp_hmacalgo *hmacs, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_hmacalgo *hmacs; u32 idents; - int err; if (!ep->auth_enable) return -EACCES; @@ -3581,21 +3579,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) + SCTP_AUTH_NUM_HMACS * sizeof(u16)); - hmacs = memdup_user(optval, optlen); - if (IS_ERR(hmacs)) - return PTR_ERR(hmacs); - idents = hmacs->shmac_num_idents; if (idents == 0 || idents > SCTP_AUTH_NUM_HMACS || - (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo))) { - err = -EINVAL; - goto out; - } + (idents * sizeof(u16)) > (optlen - sizeof(struct sctp_hmacalgo))) + return -EINVAL; - err = sctp_auth_ep_set_hmacs(ep, hmacs); -out: - kfree(hmacs); - return err; + return sctp_auth_ep_set_hmacs(ep, hmacs); } /* @@ -4699,7 +4688,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_auth_chunk(sk, kopt, optlen); break; case SCTP_HMAC_IDENT: - retval = sctp_setsockopt_hmac_ident(sk, optval, optlen); + retval = sctp_setsockopt_hmac_ident(sk, kopt, optlen); break; case SCTP_AUTH_KEY: retval = sctp_setsockopt_auth_key(sk, optval, optlen); -- cgit From 89fae01eef8b1572c71d2cfcc3307c1f1e97b778 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:03 +0200 Subject: sctp: switch sctp_setsockopt_auth_key to use memzero_explicit Switch from kzfree to sctp_setsockopt_auth_key + kfree to prepare for moving the kfree to common code. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a573af7dfe41..365145746b55 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3649,7 +3649,8 @@ static int sctp_setsockopt_auth_key(struct sock *sk, } out: - kzfree(authkey); + memzero_explicit(authkey, optlen); + kfree(authkey); return ret; } -- cgit From 534d13d07e771d090d61cd98f96b2cbf29765cef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:04 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_auth_key Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Adapt sctp_setsockopt to use a kzfree for this case. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 365145746b55..b4dcccba5787 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3594,11 +3594,10 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, * association shared key. */ static int sctp_setsockopt_auth_key(struct sock *sk, - char __user *optval, + struct sctp_authkey *authkey, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkey *authkey; struct sctp_association *asoc; int ret = -EINVAL; @@ -3609,10 +3608,6 @@ static int sctp_setsockopt_auth_key(struct sock *sk, */ optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey)); - authkey = memdup_user(optval, optlen); - if (IS_ERR(authkey)) - return PTR_ERR(authkey); - if (authkey->sca_keylength > optlen - sizeof(*authkey)) goto out; @@ -3650,7 +3645,6 @@ static int sctp_setsockopt_auth_key(struct sock *sk, out: memzero_explicit(authkey, optlen); - kfree(authkey); return ret; } @@ -4692,7 +4686,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_hmac_ident(sk, kopt, optlen); break; case SCTP_AUTH_KEY: - retval = sctp_setsockopt_auth_key(sk, optval, optlen); + retval = sctp_setsockopt_auth_key(sk, kopt, optlen); break; case SCTP_AUTH_ACTIVE_KEY: retval = sctp_setsockopt_active_key(sk, optval, optlen); -- cgit From dcab0a7a57a4c29e38af17c1106d2217e680995a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:05 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_active_key Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b4dcccba5787..02153f243683 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3655,42 +3655,39 @@ out: * the association shared key. */ static int sctp_setsockopt_active_key(struct sock *sk, - char __user *optval, + struct sctp_authkeyid *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; - struct sctp_authkeyid val; int ret = 0; if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; - if (copy_from_user(&val, optval, optlen)) - return -EFAULT; - asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, val->scact_assoc_id); + if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) - return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + return sctp_auth_set_active_key(ep, asoc, val->scact_keynumber); if (sctp_style(sk, TCP)) - val.scact_assoc_id = SCTP_FUTURE_ASSOC; + val->scact_assoc_id = SCTP_FUTURE_ASSOC; - if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || - val.scact_assoc_id == SCTP_ALL_ASSOC) { - ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (val->scact_assoc_id == SCTP_FUTURE_ASSOC || + val->scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_active_key(ep, asoc, val->scact_keynumber); if (ret) return ret; } - if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || - val.scact_assoc_id == SCTP_ALL_ASSOC) { + if (val->scact_assoc_id == SCTP_CURRENT_ASSOC || + val->scact_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_set_active_key(ep, asoc, - val.scact_keynumber); + val->scact_keynumber); if (res && !ret) ret = res; @@ -4689,7 +4686,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_auth_key(sk, kopt, optlen); break; case SCTP_AUTH_ACTIVE_KEY: - retval = sctp_setsockopt_active_key(sk, optval, optlen); + retval = sctp_setsockopt_active_key(sk, kopt, optlen); break; case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); -- cgit From 97dc9f2e3e8bdea754f59bd788ce25a0cb11f00d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:06 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_del_key Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 02153f243683..b692b9376d9d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3703,42 +3703,39 @@ static int sctp_setsockopt_active_key(struct sock *sk, * This set option will delete a shared secret key from use. */ static int sctp_setsockopt_del_key(struct sock *sk, - char __user *optval, + struct sctp_authkeyid *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; - struct sctp_authkeyid val; int ret = 0; if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; - if (copy_from_user(&val, optval, optlen)) - return -EFAULT; - asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, val->scact_assoc_id); + if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) - return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + return sctp_auth_del_key_id(ep, asoc, val->scact_keynumber); if (sctp_style(sk, TCP)) - val.scact_assoc_id = SCTP_FUTURE_ASSOC; + val->scact_assoc_id = SCTP_FUTURE_ASSOC; - if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || - val.scact_assoc_id == SCTP_ALL_ASSOC) { - ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (val->scact_assoc_id == SCTP_FUTURE_ASSOC || + val->scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_del_key_id(ep, asoc, val->scact_keynumber); if (ret) return ret; } - if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || - val.scact_assoc_id == SCTP_ALL_ASSOC) { + if (val->scact_assoc_id == SCTP_CURRENT_ASSOC || + val->scact_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_del_key_id(ep, asoc, - val.scact_keynumber); + val->scact_keynumber); if (res && !ret) ret = res; @@ -4689,7 +4686,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_active_key(sk, kopt, optlen); break; case SCTP_AUTH_DELETE_KEY: - retval = sctp_setsockopt_del_key(sk, optval, optlen); + retval = sctp_setsockopt_del_key(sk, kopt, optlen); break; case SCTP_AUTH_DEACTIVATE_KEY: retval = sctp_setsockopt_deactivate_key(sk, optval, optlen); -- cgit From 76b3d0c4455b61f74301a1aae2dcdc243921f379 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:07 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_deactivate_key Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b692b9376d9d..ab155c15939e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3750,42 +3750,40 @@ static int sctp_setsockopt_del_key(struct sock *sk, * * This set option will deactivate a shared secret key. */ -static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, +static int sctp_setsockopt_deactivate_key(struct sock *sk, + struct sctp_authkeyid *val, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_association *asoc; - struct sctp_authkeyid val; int ret = 0; if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; - if (copy_from_user(&val, optval, optlen)) - return -EFAULT; - asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, val->scact_assoc_id); + if (!asoc && val->scact_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) - return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + return sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber); if (sctp_style(sk, TCP)) - val.scact_assoc_id = SCTP_FUTURE_ASSOC; + val->scact_assoc_id = SCTP_FUTURE_ASSOC; - if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || - val.scact_assoc_id == SCTP_ALL_ASSOC) { - ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (val->scact_assoc_id == SCTP_FUTURE_ASSOC || + val->scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_deact_key_id(ep, asoc, val->scact_keynumber); if (ret) return ret; } - if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || - val.scact_assoc_id == SCTP_ALL_ASSOC) { + if (val->scact_assoc_id == SCTP_CURRENT_ASSOC || + val->scact_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &ep->asocs, asocs) { int res = sctp_auth_deact_key_id(ep, asoc, - val.scact_keynumber); + val->scact_keynumber); if (res && !ret) ret = res; @@ -4689,7 +4687,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_del_key(sk, kopt, optlen); break; case SCTP_AUTH_DEACTIVATE_KEY: - retval = sctp_setsockopt_deactivate_key(sk, optval, optlen); + retval = sctp_setsockopt_deactivate_key(sk, kopt, optlen); break; case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); -- cgit From c9abc2c1c2973a30750fa00bfc912b683d88d1bc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:08 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_auto_asconf Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ab155c15939e..64f2a967ddf5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3807,26 +3807,23 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, * Note. In this implementation, socket operation overrides default parameter * being set by sysctl as well as FreeBSD implementation */ -static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, +static int sctp_setsockopt_auto_asconf(struct sock *sk, int *val, unsigned int optlen) { - int val; struct sctp_sock *sp = sctp_sk(sk); if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; - if (!sctp_is_ep_boundall(sk) && val) + if (!sctp_is_ep_boundall(sk) && *val) return -EINVAL; - if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) + if ((*val && sp->do_auto_asconf) || (!*val && !sp->do_auto_asconf)) return 0; spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock); - if (val == 0 && sp->do_auto_asconf) { + if (*val == 0 && sp->do_auto_asconf) { list_del(&sp->auto_asconf_list); sp->do_auto_asconf = 0; - } else if (val && !sp->do_auto_asconf) { + } else if (*val && !sp->do_auto_asconf) { list_add_tail(&sp->auto_asconf_list, &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; @@ -4690,7 +4687,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_deactivate_key(sk, kopt, optlen); break; case SCTP_AUTO_ASCONF: - retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); + retval = sctp_setsockopt_auto_asconf(sk, kopt, optlen); break; case SCTP_PEER_ADDR_THLDS: retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen, -- cgit From b0ac3bb89464f6c54d860052aa52774c1ab95436 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:09 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_paddr_thresholds Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 55 ++++++++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 64f2a967ddf5..ea027ea74ba5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3840,66 +3840,63 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, int *val, * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt */ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, - char __user *optval, + struct sctp_paddrthlds_v2 *val, unsigned int optlen, bool v2) { - struct sctp_paddrthlds_v2 val; struct sctp_transport *trans; struct sctp_association *asoc; int len; - len = v2 ? sizeof(val) : sizeof(struct sctp_paddrthlds); + len = v2 ? sizeof(*val) : sizeof(struct sctp_paddrthlds); if (optlen < len) return -EINVAL; - if (copy_from_user(&val, optval, len)) - return -EFAULT; - if (v2 && val.spt_pathpfthld > val.spt_pathcpthld) + if (v2 && val->spt_pathpfthld > val->spt_pathcpthld) return -EINVAL; - if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); + if (!sctp_is_any(sk, (const union sctp_addr *)&val->spt_address)) { + trans = sctp_addr_id2transport(sk, &val->spt_address, + val->spt_assoc_id); if (!trans) return -ENOENT; - if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; + if (val->spt_pathmaxrxt) + trans->pathmaxrxt = val->spt_pathmaxrxt; if (v2) - trans->ps_retrans = val.spt_pathcpthld; - trans->pf_retrans = val.spt_pathpfthld; + trans->ps_retrans = val->spt_pathcpthld; + trans->pf_retrans = val->spt_pathpfthld; return 0; } - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, val->spt_assoc_id); + if (!asoc && val->spt_assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { - if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; + if (val->spt_pathmaxrxt) + trans->pathmaxrxt = val->spt_pathmaxrxt; if (v2) - trans->ps_retrans = val.spt_pathcpthld; - trans->pf_retrans = val.spt_pathpfthld; + trans->ps_retrans = val->spt_pathcpthld; + trans->pf_retrans = val->spt_pathpfthld; } - if (val.spt_pathmaxrxt) - asoc->pathmaxrxt = val.spt_pathmaxrxt; + if (val->spt_pathmaxrxt) + asoc->pathmaxrxt = val->spt_pathmaxrxt; if (v2) - asoc->ps_retrans = val.spt_pathcpthld; - asoc->pf_retrans = val.spt_pathpfthld; + asoc->ps_retrans = val->spt_pathcpthld; + asoc->pf_retrans = val->spt_pathpfthld; } else { struct sctp_sock *sp = sctp_sk(sk); - if (val.spt_pathmaxrxt) - sp->pathmaxrxt = val.spt_pathmaxrxt; + if (val->spt_pathmaxrxt) + sp->pathmaxrxt = val->spt_pathmaxrxt; if (v2) - sp->ps_retrans = val.spt_pathcpthld; - sp->pf_retrans = val.spt_pathpfthld; + sp->ps_retrans = val->spt_pathcpthld; + sp->pf_retrans = val->spt_pathpfthld; } return 0; @@ -4690,11 +4687,11 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_auto_asconf(sk, kopt, optlen); break; case SCTP_PEER_ADDR_THLDS: - retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen, + retval = sctp_setsockopt_paddr_thresholds(sk, kopt, optlen, false); break; case SCTP_PEER_ADDR_THLDS_V2: - retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen, + retval = sctp_setsockopt_paddr_thresholds(sk, kopt, optlen, true); break; case SCTP_RECVRCVINFO: -- cgit From a98af7c84ad95d45058050ad3b9bb0cd31d93614 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:10 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_recvrcvinfo Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ea027ea74ba5..ac56fb7eb394 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3902,18 +3902,13 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, return 0; } -static int sctp_setsockopt_recvrcvinfo(struct sock *sk, - char __user *optval, +static int sctp_setsockopt_recvrcvinfo(struct sock *sk, int *val, unsigned int optlen) { - int val; - if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *) optval)) - return -EFAULT; - sctp_sk(sk)->recvrcvinfo = (val == 0) ? 0 : 1; + sctp_sk(sk)->recvrcvinfo = (*val == 0) ? 0 : 1; return 0; } @@ -4695,7 +4690,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, true); break; case SCTP_RECVRCVINFO: - retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen); + retval = sctp_setsockopt_recvrcvinfo(sk, kopt, optlen); break; case SCTP_RECVNXTINFO: retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen); -- cgit From cfa6fde26642553fcb0cdbfefe80f075eb6173f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:11 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_recvnxtinfo Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ac56fb7eb394..7beb26e5749d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3913,18 +3913,13 @@ static int sctp_setsockopt_recvrcvinfo(struct sock *sk, int *val, return 0; } -static int sctp_setsockopt_recvnxtinfo(struct sock *sk, - char __user *optval, +static int sctp_setsockopt_recvnxtinfo(struct sock *sk, int *val, unsigned int optlen) { - int val; - if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *) optval)) - return -EFAULT; - sctp_sk(sk)->recvnxtinfo = (val == 0) ? 0 : 1; + sctp_sk(sk)->recvnxtinfo = (*val == 0) ? 0 : 1; return 0; } @@ -4693,7 +4688,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_recvrcvinfo(sk, kopt, optlen); break; case SCTP_RECVNXTINFO: - retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen); + retval = sctp_setsockopt_recvnxtinfo(sk, kopt, optlen); break; case SCTP_PR_SUPPORTED: retval = sctp_setsockopt_pr_supported(sk, optval, optlen); -- cgit From 4a97fa4f099b9ac82d0e0adbd2f63fccf3dd96fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:12 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_pr_supported Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7beb26e5749d..245186327896 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3925,24 +3925,20 @@ static int sctp_setsockopt_recvnxtinfo(struct sock *sk, int *val, } static int sctp_setsockopt_pr_supported(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { - struct sctp_assoc_value params; struct sctp_association *asoc; - if (optlen != sizeof(params)) + if (optlen != sizeof(*params)) return -EINVAL; - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) return -EINVAL; - sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value; + sctp_sk(sk)->ep->prsctp_enable = !!params->assoc_value; return 0; } @@ -4691,7 +4687,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_recvnxtinfo(sk, kopt, optlen); break; case SCTP_PR_SUPPORTED: - retval = sctp_setsockopt_pr_supported(sk, optval, optlen); + retval = sctp_setsockopt_pr_supported(sk, kopt, optlen); break; case SCTP_DEFAULT_PRINFO: retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); -- cgit From ac37435bfe9e09fc107daf6eddb57831b5961708 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:13 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_default_prinfo Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 245186327896..0eeb6e6162ad 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3944,55 +3944,50 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, } static int sctp_setsockopt_default_prinfo(struct sock *sk, - char __user *optval, + struct sctp_default_prinfo *info, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct sctp_default_prinfo info; struct sctp_association *asoc; int retval = -EINVAL; - if (optlen != sizeof(info)) - goto out; - - if (copy_from_user(&info, optval, sizeof(info))) { - retval = -EFAULT; + if (optlen != sizeof(*info)) goto out; - } - if (info.pr_policy & ~SCTP_PR_SCTP_MASK) + if (info->pr_policy & ~SCTP_PR_SCTP_MASK) goto out; - if (info.pr_policy == SCTP_PR_SCTP_NONE) - info.pr_value = 0; + if (info->pr_policy == SCTP_PR_SCTP_NONE) + info->pr_value = 0; - asoc = sctp_id2assoc(sk, info.pr_assoc_id); - if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, info->pr_assoc_id); + if (!asoc && info->pr_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) goto out; retval = 0; if (asoc) { - SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); - asoc->default_timetolive = info.pr_value; + SCTP_PR_SET_POLICY(asoc->default_flags, info->pr_policy); + asoc->default_timetolive = info->pr_value; goto out; } if (sctp_style(sk, TCP)) - info.pr_assoc_id = SCTP_FUTURE_ASSOC; + info->pr_assoc_id = SCTP_FUTURE_ASSOC; - if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || - info.pr_assoc_id == SCTP_ALL_ASSOC) { - SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); - sp->default_timetolive = info.pr_value; + if (info->pr_assoc_id == SCTP_FUTURE_ASSOC || + info->pr_assoc_id == SCTP_ALL_ASSOC) { + SCTP_PR_SET_POLICY(sp->default_flags, info->pr_policy); + sp->default_timetolive = info->pr_value; } - if (info.pr_assoc_id == SCTP_CURRENT_ASSOC || - info.pr_assoc_id == SCTP_ALL_ASSOC) { + if (info->pr_assoc_id == SCTP_CURRENT_ASSOC || + info->pr_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { - SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); - asoc->default_timetolive = info.pr_value; + SCTP_PR_SET_POLICY(asoc->default_flags, + info->pr_policy); + asoc->default_timetolive = info->pr_value; } } @@ -4690,7 +4685,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_pr_supported(sk, kopt, optlen); break; case SCTP_DEFAULT_PRINFO: - retval = sctp_setsockopt_default_prinfo(sk, optval, optlen); + retval = sctp_setsockopt_default_prinfo(sk, kopt, optlen); break; case SCTP_RECONFIG_SUPPORTED: retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen); -- cgit From 3f49f72035d5625c06a23d72c22feee7f0d2b068 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:14 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_reconfig_supported Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0eeb6e6162ad..270c5d53fbf7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3996,27 +3996,21 @@ out: } static int sctp_setsockopt_reconfig_supported(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { - struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; - if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen != sizeof(*params)) goto out; - } - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; - sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value; + sctp_sk(sk)->ep->reconf_enable = !!params->assoc_value; retval = 0; @@ -4688,7 +4682,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_default_prinfo(sk, kopt, optlen); break; case SCTP_RECONFIG_SUPPORTED: - retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen); + retval = sctp_setsockopt_reconfig_supported(sk, kopt, optlen); break; case SCTP_ENABLE_STREAM_RESET: retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); -- cgit From 356dc6f16a96615d89c8aab3ca18c5bc7c3d4baa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:15 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_enable_strreset Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 270c5d53fbf7..9899d208f40f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4019,48 +4019,42 @@ out: } static int sctp_setsockopt_enable_strreset(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; - if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen != sizeof(*params)) goto out; - } - if (params.assoc_value & (~SCTP_ENABLE_STRRESET_MASK)) + if (params->assoc_value & (~SCTP_ENABLE_STRRESET_MASK)) goto out; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) goto out; retval = 0; if (asoc) { - asoc->strreset_enable = params.assoc_value; + asoc->strreset_enable = params->assoc_value; goto out; } if (sctp_style(sk, TCP)) - params.assoc_id = SCTP_FUTURE_ASSOC; + params->assoc_id = SCTP_FUTURE_ASSOC; - if (params.assoc_id == SCTP_FUTURE_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) - ep->strreset_enable = params.assoc_value; + if (params->assoc_id == SCTP_FUTURE_ASSOC || + params->assoc_id == SCTP_ALL_ASSOC) + ep->strreset_enable = params->assoc_value; - if (params.assoc_id == SCTP_CURRENT_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) + if (params->assoc_id == SCTP_CURRENT_ASSOC || + params->assoc_id == SCTP_ALL_ASSOC) list_for_each_entry(asoc, &ep->asocs, asocs) - asoc->strreset_enable = params.assoc_value; + asoc->strreset_enable = params->assoc_value; out: return retval; @@ -4685,7 +4679,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_reconfig_supported(sk, kopt, optlen); break; case SCTP_ENABLE_STREAM_RESET: - retval = sctp_setsockopt_enable_strreset(sk, optval, optlen); + retval = sctp_setsockopt_enable_strreset(sk, kopt, optlen); break; case SCTP_RESET_STREAMS: retval = sctp_setsockopt_reset_streams(sk, optval, optlen); -- cgit From d492243435bdffec229b1a9d694e5864e802833a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:16 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_reset_streams Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9899d208f40f..1365351fd2c8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4061,12 +4061,10 @@ out: } static int sctp_setsockopt_reset_streams(struct sock *sk, - char __user *optval, + struct sctp_reset_streams *params, unsigned int optlen) { - struct sctp_reset_streams *params; struct sctp_association *asoc; - int retval = -EINVAL; if (optlen < sizeof(*params)) return -EINVAL; @@ -4074,23 +4072,15 @@ static int sctp_setsockopt_reset_streams(struct sock *sk, optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(__u16) * sizeof(*params)); - params = memdup_user(optval, optlen); - if (IS_ERR(params)) - return PTR_ERR(params); - if (params->srs_number_streams * sizeof(__u16) > optlen - sizeof(*params)) - goto out; + return -EINVAL; asoc = sctp_id2assoc(sk, params->srs_assoc_id); if (!asoc) - goto out; - - retval = sctp_send_reset_streams(asoc, params); + return -EINVAL; -out: - kfree(params); - return retval; + return sctp_send_reset_streams(asoc, params); } static int sctp_setsockopt_reset_assoc(struct sock *sk, @@ -4682,7 +4672,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_enable_strreset(sk, kopt, optlen); break; case SCTP_RESET_STREAMS: - retval = sctp_setsockopt_reset_streams(sk, optval, optlen); + retval = sctp_setsockopt_reset_streams(sk, kopt, optlen); break; case SCTP_RESET_ASSOC: retval = sctp_setsockopt_reset_assoc(sk, optval, optlen); -- cgit From b97d20ce531e13ce6e65fc8bd2d726a0ec24b5a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:17 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_reset_assoc Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1365351fd2c8..f190f59f2959 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4083,30 +4083,19 @@ static int sctp_setsockopt_reset_streams(struct sock *sk, return sctp_send_reset_streams(asoc, params); } -static int sctp_setsockopt_reset_assoc(struct sock *sk, - char __user *optval, +static int sctp_setsockopt_reset_assoc(struct sock *sk, sctp_assoc_t *associd, unsigned int optlen) { struct sctp_association *asoc; - sctp_assoc_t associd; - int retval = -EINVAL; - - if (optlen != sizeof(associd)) - goto out; - if (copy_from_user(&associd, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (optlen != sizeof(*associd)) + return -EINVAL; - asoc = sctp_id2assoc(sk, associd); + asoc = sctp_id2assoc(sk, *associd); if (!asoc) - goto out; - - retval = sctp_send_reset_assoc(asoc); + return -EINVAL; -out: - return retval; + return sctp_send_reset_assoc(asoc); } static int sctp_setsockopt_add_streams(struct sock *sk, @@ -4675,7 +4664,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_reset_streams(sk, kopt, optlen); break; case SCTP_RESET_ASSOC: - retval = sctp_setsockopt_reset_assoc(sk, optval, optlen); + retval = sctp_setsockopt_reset_assoc(sk, kopt, optlen); break; case SCTP_ADD_STREAMS: retval = sctp_setsockopt_add_streams(sk, optval, optlen); -- cgit From 4d6fb2606252a4a001388f52808edbf3c624d033 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:18 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_add_streams Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f190f59f2959..4dedb94bca77 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4099,29 +4099,19 @@ static int sctp_setsockopt_reset_assoc(struct sock *sk, sctp_assoc_t *associd, } static int sctp_setsockopt_add_streams(struct sock *sk, - char __user *optval, + struct sctp_add_streams *params, unsigned int optlen) { struct sctp_association *asoc; - struct sctp_add_streams params; - int retval = -EINVAL; - - if (optlen != sizeof(params)) - goto out; - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (optlen != sizeof(*params)) + return -EINVAL; - asoc = sctp_id2assoc(sk, params.sas_assoc_id); + asoc = sctp_id2assoc(sk, params->sas_assoc_id); if (!asoc) - goto out; - - retval = sctp_send_add_streams(asoc, ¶ms); + return -EINVAL; -out: - return retval; + return sctp_send_add_streams(asoc, params); } static int sctp_setsockopt_scheduler(struct sock *sk, @@ -4667,7 +4657,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_reset_assoc(sk, kopt, optlen); break; case SCTP_ADD_STREAMS: - retval = sctp_setsockopt_add_streams(sk, optval, optlen); + retval = sctp_setsockopt_add_streams(sk, kopt, optlen); break; case SCTP_STREAM_SCHEDULER: retval = sctp_setsockopt_scheduler(sk, optval, optlen); -- cgit From 4d2fba3a7e7bda272447bf77377bf330f8a83ec7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:19 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_scheduler Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4dedb94bca77..a7d6a66fbe21 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4115,44 +4115,39 @@ static int sctp_setsockopt_add_streams(struct sock *sk, } static int sctp_setsockopt_scheduler(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_assoc_value params; int retval = 0; - if (optlen < sizeof(params)) + if (optlen < sizeof(*params)) return -EINVAL; - optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) - return -EFAULT; - - if (params.assoc_value > SCTP_SS_MAX) + if (params->assoc_value > SCTP_SS_MAX) return -EINVAL; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) - return sctp_sched_set_sched(asoc, params.assoc_value); + return sctp_sched_set_sched(asoc, params->assoc_value); if (sctp_style(sk, TCP)) - params.assoc_id = SCTP_FUTURE_ASSOC; + params->assoc_id = SCTP_FUTURE_ASSOC; - if (params.assoc_id == SCTP_FUTURE_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) - sp->default_ss = params.assoc_value; + if (params->assoc_id == SCTP_FUTURE_ASSOC || + params->assoc_id == SCTP_ALL_ASSOC) + sp->default_ss = params->assoc_value; - if (params.assoc_id == SCTP_CURRENT_ASSOC || - params.assoc_id == SCTP_ALL_ASSOC) { + if (params->assoc_id == SCTP_CURRENT_ASSOC || + params->assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { int ret = sctp_sched_set_sched(asoc, - params.assoc_value); + params->assoc_value); if (ret && !retval) retval = ret; @@ -4660,7 +4655,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_add_streams(sk, kopt, optlen); break; case SCTP_STREAM_SCHEDULER: - retval = sctp_setsockopt_scheduler(sk, optval, optlen); + retval = sctp_setsockopt_scheduler(sk, kopt, optlen); break; case SCTP_STREAM_SCHEDULER_VALUE: retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); -- cgit From d636e7f31fcc986525cb853f0fabccc2408f0840 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:20 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_scheduler_value Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a7d6a66fbe21..4b660e6bf3bb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4158,38 +4158,32 @@ static int sctp_setsockopt_scheduler(struct sock *sk, } static int sctp_setsockopt_scheduler_value(struct sock *sk, - char __user *optval, + struct sctp_stream_value *params, unsigned int optlen) { - struct sctp_stream_value params; struct sctp_association *asoc; int retval = -EINVAL; - if (optlen < sizeof(params)) - goto out; - - optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen < sizeof(*params)) goto out; - } - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_CURRENT_ASSOC && sctp_style(sk, UDP)) goto out; if (asoc) { - retval = sctp_sched_set_value(asoc, params.stream_id, - params.stream_value, GFP_KERNEL); + retval = sctp_sched_set_value(asoc, params->stream_id, + params->stream_value, GFP_KERNEL); goto out; } retval = 0; list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) { - int ret = sctp_sched_set_value(asoc, params.stream_id, - params.stream_value, GFP_KERNEL); + int ret = sctp_sched_set_value(asoc, params->stream_id, + params->stream_value, + GFP_KERNEL); if (ret && !retval) /* try to return the 1st error. */ retval = ret; } @@ -4658,7 +4652,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_scheduler(sk, kopt, optlen); break; case SCTP_STREAM_SCHEDULER_VALUE: - retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); + retval = sctp_setsockopt_scheduler_value(sk, kopt, optlen); break; case SCTP_INTERLEAVING_SUPPORTED: retval = sctp_setsockopt_interleaving_supported(sk, optval, -- cgit From 5b8d3b24467a41388097411ac0b2fc1f4b10bbf5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:21 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_interleaving_supported Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4b660e6bf3bb..6232e46c4cde 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4193,39 +4193,25 @@ out: } static int sctp_setsockopt_interleaving_supported(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *p, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct sctp_assoc_value params; struct sctp_association *asoc; - int retval = -EINVAL; - if (optlen < sizeof(params)) - goto out; - - optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (optlen < sizeof(*p)) + return -EINVAL; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && - sctp_style(sk, UDP)) - goto out; + asoc = sctp_id2assoc(sk, p->assoc_id); + if (!asoc && p->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) + return -EINVAL; if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) { - retval = -EPERM; - goto out; + return -EPERM; } - sp->ep->intl_enable = !!params.assoc_value; - - retval = 0; - -out: - return retval; + sp->ep->intl_enable = !!p->assoc_value; + return 0; } static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval, @@ -4655,7 +4641,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_scheduler_value(sk, kopt, optlen); break; case SCTP_INTERLEAVING_SUPPORTED: - retval = sctp_setsockopt_interleaving_supported(sk, optval, + retval = sctp_setsockopt_interleaving_supported(sk, kopt, optlen); break; case SCTP_REUSE_PORT: -- cgit From a42624669e1afd183a82dbfe7cc29cc1a403c1fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:22 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_reuse_port Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6232e46c4cde..a5b95e68cc12 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4214,11 +4214,9 @@ static int sctp_setsockopt_interleaving_supported(struct sock *sk, return 0; } -static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval, +static int sctp_setsockopt_reuse_port(struct sock *sk, int *val, unsigned int optlen) { - int val; - if (!sctp_style(sk, TCP)) return -EOPNOTSUPP; @@ -4228,10 +4226,7 @@ static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; - - sctp_sk(sk)->reuse = !!val; + sctp_sk(sk)->reuse = !!*val; return 0; } @@ -4645,7 +4640,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, optlen); break; case SCTP_REUSE_PORT: - retval = sctp_setsockopt_reuse_port(sk, optval, optlen); + retval = sctp_setsockopt_reuse_port(sk, kopt, optlen); break; case SCTP_EVENT: retval = sctp_setsockopt_event(sk, optval, optlen); -- cgit From 565059cb9b24bd7804948802ef38cc99c35c2c64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:23 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_event Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a5b95e68cc12..12a33fc42eee 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4252,45 +4252,40 @@ static int sctp_assoc_ulpevent_type_set(struct sctp_event *param, return 0; } -static int sctp_setsockopt_event(struct sock *sk, char __user *optval, +static int sctp_setsockopt_event(struct sock *sk, struct sctp_event *param, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_event param; int retval = 0; - if (optlen < sizeof(param)) + if (optlen < sizeof(*param)) return -EINVAL; - optlen = sizeof(param); - if (copy_from_user(¶m, optval, optlen)) - return -EFAULT; - - if (param.se_type < SCTP_SN_TYPE_BASE || - param.se_type > SCTP_SN_TYPE_MAX) + if (param->se_type < SCTP_SN_TYPE_BASE || + param->se_type > SCTP_SN_TYPE_MAX) return -EINVAL; - asoc = sctp_id2assoc(sk, param.se_assoc_id); - if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC && + asoc = sctp_id2assoc(sk, param->se_assoc_id); + if (!asoc && param->se_assoc_id > SCTP_ALL_ASSOC && sctp_style(sk, UDP)) return -EINVAL; if (asoc) - return sctp_assoc_ulpevent_type_set(¶m, asoc); + return sctp_assoc_ulpevent_type_set(param, asoc); if (sctp_style(sk, TCP)) - param.se_assoc_id = SCTP_FUTURE_ASSOC; + param->se_assoc_id = SCTP_FUTURE_ASSOC; - if (param.se_assoc_id == SCTP_FUTURE_ASSOC || - param.se_assoc_id == SCTP_ALL_ASSOC) + if (param->se_assoc_id == SCTP_FUTURE_ASSOC || + param->se_assoc_id == SCTP_ALL_ASSOC) sctp_ulpevent_type_set(&sp->subscribe, - param.se_type, param.se_on); + param->se_type, param->se_on); - if (param.se_assoc_id == SCTP_CURRENT_ASSOC || - param.se_assoc_id == SCTP_ALL_ASSOC) { + if (param->se_assoc_id == SCTP_CURRENT_ASSOC || + param->se_assoc_id == SCTP_ALL_ASSOC) { list_for_each_entry(asoc, &sp->ep->asocs, asocs) { - int ret = sctp_assoc_ulpevent_type_set(¶m, asoc); + int ret = sctp_assoc_ulpevent_type_set(param, asoc); if (ret && !retval) retval = ret; @@ -4643,7 +4638,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_reuse_port(sk, kopt, optlen); break; case SCTP_EVENT: - retval = sctp_setsockopt_event(sk, optval, optlen); + retval = sctp_setsockopt_event(sk, kopt, optlen); break; case SCTP_ASCONF_SUPPORTED: retval = sctp_setsockopt_asconf_supported(sk, optval, optlen); -- cgit From 9263ac97af46e0f31ba38d730e45c64dc701954e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:24 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_event Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 12a33fc42eee..b52acbf231ab 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4296,29 +4296,23 @@ static int sctp_setsockopt_event(struct sock *sk, struct sctp_event *param, } static int sctp_setsockopt_asconf_supported(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { - struct sctp_assoc_value params; struct sctp_association *asoc; struct sctp_endpoint *ep; int retval = -EINVAL; - if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen != sizeof(*params)) goto out; - } - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; ep = sctp_sk(sk)->ep; - ep->asconf_enable = !!params.assoc_value; + ep->asconf_enable = !!params->assoc_value; if (ep->asconf_enable && ep->auth_enable) { sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); @@ -4641,7 +4635,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_event(sk, kopt, optlen); break; case SCTP_ASCONF_SUPPORTED: - retval = sctp_setsockopt_asconf_supported(sk, optval, optlen); + retval = sctp_setsockopt_asconf_supported(sk, kopt, optlen); break; case SCTP_AUTH_SUPPORTED: retval = sctp_setsockopt_auth_supported(sk, optval, optlen); -- cgit From 963855a938cad73b950fb42fa5376e8ab3fd1475 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:25 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_auth_supported Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b52acbf231ab..ca369447237a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4326,29 +4326,23 @@ out: } static int sctp_setsockopt_auth_supported(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { - struct sctp_assoc_value params; struct sctp_association *asoc; struct sctp_endpoint *ep; int retval = -EINVAL; - if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen != sizeof(*params)) goto out; - } - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; ep = sctp_sk(sk)->ep; - if (params.assoc_value) { + if (params->assoc_value) { retval = sctp_auth_init(ep, GFP_KERNEL); if (retval) goto out; @@ -4358,7 +4352,7 @@ static int sctp_setsockopt_auth_supported(struct sock *sk, } } - ep->auth_enable = !!params.assoc_value; + ep->auth_enable = !!params->assoc_value; retval = 0; out: @@ -4638,7 +4632,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_asconf_supported(sk, kopt, optlen); break; case SCTP_AUTH_SUPPORTED: - retval = sctp_setsockopt_auth_supported(sk, optval, optlen); + retval = sctp_setsockopt_auth_supported(sk, kopt, optlen); break; case SCTP_ECN_SUPPORTED: retval = sctp_setsockopt_ecn_supported(sk, optval, optlen); -- cgit From 92c4f172552a37f1dbc385615b4497bd15df1d8f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:26 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_ecn_supported Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ca369447237a..9f7f9aa50d4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4360,27 +4360,21 @@ out: } static int sctp_setsockopt_ecn_supported(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { - struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; - if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen != sizeof(*params)) goto out; - } - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; - sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value; + sctp_sk(sk)->ep->ecn_enable = !!params->assoc_value; retval = 0; out: @@ -4635,7 +4629,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_auth_supported(sk, kopt, optlen); break; case SCTP_ECN_SUPPORTED: - retval = sctp_setsockopt_ecn_supported(sk, optval, optlen); + retval = sctp_setsockopt_ecn_supported(sk, kopt, optlen); break; case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: retval = sctp_setsockopt_pf_expose(sk, optval, optlen); -- cgit From 26feba8090773550f255aed911d8f824ca42b7eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:27 +0200 Subject: sctp: pass a kernel pointer to sctp_setsockopt_pf_expose Use the kernel pointer that sctp_setsockopt has available instead of directly handling the user pointer. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9f7f9aa50d4b..f2d4f8a0c426 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4382,33 +4382,27 @@ out: } static int sctp_setsockopt_pf_expose(struct sock *sk, - char __user *optval, + struct sctp_assoc_value *params, unsigned int optlen) { - struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; - if (optlen != sizeof(params)) - goto out; - - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; + if (optlen != sizeof(*params)) goto out; - } - if (params.assoc_value > SCTP_PF_EXPOSE_MAX) + if (params->assoc_value > SCTP_PF_EXPOSE_MAX) goto out; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + asoc = sctp_id2assoc(sk, params->assoc_id); + if (!asoc && params->assoc_id != SCTP_FUTURE_ASSOC && sctp_style(sk, UDP)) goto out; if (asoc) - asoc->pf_expose = params.assoc_value; + asoc->pf_expose = params->assoc_value; else - sctp_sk(sk)->pf_expose = params.assoc_value; + sctp_sk(sk)->pf_expose = params->assoc_value; retval = 0; out: @@ -4632,7 +4626,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, retval = sctp_setsockopt_ecn_supported(sk, kopt, optlen); break; case SCTP_EXPOSE_POTENTIALLY_FAILED_STATE: - retval = sctp_setsockopt_pf_expose(sk, optval, optlen); + retval = sctp_setsockopt_pf_expose(sk, kopt, optlen); break; default: retval = -ENOPROTOOPT; -- cgit From 6c8983a606622b61a429830091fdfe643328b96a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jul 2020 09:22:28 +0200 Subject: sctp: remove the out_nounlock label in sctp_setsockopt This is just used once, and a direct return for the redirect to the AF case is much easier to follow than jumping to the end of a very long function. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f2d4f8a0c426..9a767f359718 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4444,8 +4444,8 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, */ if (level != SOL_SCTP) { struct sctp_af *af = sctp_sk(sk)->pf->af; - retval = af->setsockopt(sk, level, optname, optval, optlen); - goto out_nounlock; + + return af->setsockopt(sk, level, optname, optval, optlen); } if (optlen > 0) { @@ -4635,8 +4635,6 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, release_sock(sk); kfree(kopt); - -out_nounlock: return retval; } -- cgit From eba75c587e811d3249c8bd50d22bb2266ccd3c0f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 10 Jul 2020 09:29:02 -0400 Subject: icmp: support rfc 4884 Add setsockopt SOL_IP/IP_RECVERR_4884 to return the offset to an extension struct if present. ICMP messages may include an extension structure after the original datagram. RFC 4884 standardized this behavior. It stores the offset in words to the extension header in u8 icmphdr.un.reserved[1]. The field is valid only for ICMP types destination unreachable, time exceeded and parameter problem, if length is at least 128 bytes and entire packet does not exceed 576 bytes. Return the offset to the start of the extension struct when reading an ICMP error from the error queue, if it matches the above constraints. Do not return the raw u8 field. Return the offset from the start of the user buffer, in bytes. The kernel does not return the network and transport headers, so subtract those. Also validate the headers. Return the offset regardless of validation, as an invalid extension must still not be misinterpreted as part of the original datagram. Note that !invalid does not imply valid. If the extension version does not match, no validation can take place, for instance. For backward compatibility, make this optional, set by setsockopt SOL_IP/IP_RECVERR_RFC4884. For API example and feature test, see github.com/wdebruij/kerneltools/blob/master/tests/recv_icmp_v2.c For forward compatibility, reserve only setsockopt value 1, leaving other bits for additional icmp extensions. Changes v1->v2: - convert word offset to byte offset from start of user buffer - return in ee_data as u8 may be insufficient - define extension struct and object header structs - return len only if constraints met - if returning len, also validate Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_sockglue.c | 12 +++++++++ 2 files changed, 83 insertions(+) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e30515f89802..793aebf07c2a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1116,6 +1116,77 @@ error: goto drop; } +static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off) +{ + struct icmp_extobj_hdr *objh, _objh; + struct icmp_ext_hdr *exth, _exth; + u16 olen; + + exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth); + if (!exth) + return false; + if (exth->version != 2) + return true; + + if (exth->checksum && + csum_fold(skb_checksum(skb, off, skb->len - off, 0))) + return false; + + off += sizeof(_exth); + while (off < skb->len) { + objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh); + if (!objh) + return false; + + olen = ntohs(objh->length); + if (olen < sizeof(_objh)) + return false; + + off += olen; + if (off > skb->len) + return false; + } + + return true; +} + +void ip_icmp_error_rfc4884(const struct sk_buff *skb, + struct sock_ee_data_rfc4884 *out) +{ + int hlen, off; + + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return; + } + + /* outer headers up to inner iph. skb->data is at inner payload */ + hlen = -skb_transport_offset(skb) - sizeof(struct icmphdr); + + /* per rfc 791: maximum packet length of 576 bytes */ + if (hlen + skb->len > 576) + return; + + /* per rfc 4884: minimal datagram length of 128 bytes */ + off = icmp_hdr(skb)->un.reserved[1] * sizeof(u32); + if (off < 128) + return; + + /* kernel has stripped headers: return payload offset in bytes */ + off -= hlen; + if (off + sizeof(struct icmp_ext_hdr) > skb->len) + return; + + out->len = off; + + if (!ip_icmp_error_rfc4884_validate(skb, off)) + out->flags |= SO_EE_RFC4884_FLAG_INVALID; +} + int icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 86b3b9a7cea3..a5ea02d7a183 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -411,6 +411,9 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, serr->port = port; if (skb_pull(skb, payload - skb->data)) { + if (inet_sk(sk)->recverr_rfc4884) + ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); + skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb) == 0) return; @@ -904,6 +907,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_RECVORIGDSTADDR: case IP_CHECKSUM: case IP_RECVFRAGSIZE: + case IP_RECVERR_RFC4884: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -1063,6 +1067,11 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (!val) skb_queue_purge(&sk->sk_error_queue); break; + case IP_RECVERR_RFC4884: + if (val < 0 || val > 1) + goto e_inval; + inet->recverr_rfc4884 = !!val; + break; case IP_MULTICAST_TTL: if (sk->sk_type == SOCK_STREAM) goto e_inval; @@ -1611,6 +1620,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_RECVERR: val = inet->recverr; break; + case IP_RECVERR_RFC4884: + val = inet->recverr_rfc4884; + break; case IP_MULTICAST_TTL: val = inet->mc_ttl; break; -- cgit From aad74d849dd56a04ccb289f7c61909a68337ce5e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 19 Jul 2020 20:49:51 -0700 Subject: net: Wrap ndo_do_ioctl() to prepare for DSA stacked ops In preparation for adding another layer of call into a DSA stacked ops singleton, wrap the ndo_do_ioctl() call into dev_do_ioctl(). Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 547b587c1950..a213c703c90a 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -225,6 +225,22 @@ static int net_hwtstamp_validate(struct ifreq *ifr) return 0; } +static int dev_do_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int err = -EOPNOTSUPP; + + if (ops->ndo_do_ioctl) { + if (netif_device_present(dev)) + err = ops->ndo_do_ioctl(dev, ifr, cmd); + else + err = -ENODEV; + } + + return err; +} + /* * Perform the SIOCxIFxxx calls, inside rtnl_lock() */ @@ -323,13 +339,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCSHWTSTAMP || cmd == SIOCGHWTSTAMP || cmd == SIOCWANDEV) { - err = -EOPNOTSUPP; - if (ops->ndo_do_ioctl) { - if (netif_device_present(dev)) - err = ops->ndo_do_ioctl(dev, ifr, cmd); - else - err = -ENODEV; - } + err = dev_do_ioctl(dev, ifr, cmd); } else err = -EINVAL; -- cgit From 3369afba1e46b0f7f33b4853a9d06616be6e22d5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 19 Jul 2020 20:49:53 -0700 Subject: net: Call into DSA netdevice_ops wrappers Make the core net_device code call into our ndo_do_ioctl() and ndo_get_phys_port_name() functions via the wrappers defined previously Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++++ net/core/dev_ioctl.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 062a00fdca9b..19f1abc26fcd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -8602,6 +8603,10 @@ int dev_get_phys_port_name(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; int err; + err = dsa_ndo_get_phys_port_name(dev, name, len); + if (err == 0 || err != -EOPNOTSUPP) + return err; + if (ops->ndo_get_phys_port_name) { err = ops->ndo_get_phys_port_name(dev, name, len); if (err != -EOPNOTSUPP) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index a213c703c90a..b2cf9b7bb7b8 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -231,6 +232,10 @@ static int dev_do_ioctl(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; int err = -EOPNOTSUPP; + err = dsa_ndo_do_ioctl(dev, ifr, cmd); + if (err == 0 || err != -EOPNOTSUPP) + return err; + if (ops->ndo_do_ioctl) { if (netif_device_present(dev)) err = ops->ndo_do_ioctl(dev, ifr, cmd); -- cgit From 9c0c7014f38206a2b63e7e832edf2e881a7b49ad Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 19 Jul 2020 20:49:54 -0700 Subject: net: dsa: Setup dsa_netdev_ops Now that we have all the infrastructure in place for calling into the dsa_ptr->netdev_ops function pointers, install them when we configure the DSA CPU/management interface and tear them down. The flow is unchanged from before, but now we preserve equality of tests when network device drivers do tests like dev->netdev_ops == &foo_ops which was not the case before since we were allocating an entirely new structure. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/master.c | 52 +++++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/dsa/master.c b/net/dsa/master.c index 480a61460c23..0a90911ae31b 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -220,12 +220,17 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } - if (cpu_dp->orig_ndo_ops && cpu_dp->orig_ndo_ops->ndo_do_ioctl) - err = cpu_dp->orig_ndo_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) + err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); return err; } +static const struct dsa_netdevice_ops dsa_netdev_ops = { + .ndo_do_ioctl = dsa_master_ioctl, + .ndo_get_phys_port_name = dsa_master_get_phys_port_name, +}; + static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -260,38 +265,10 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) cpu_dp->orig_ethtool_ops = NULL; } -static int dsa_master_ndo_setup(struct net_device *dev) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - struct dsa_switch *ds = cpu_dp->ds; - struct net_device_ops *ops; - - if (dev->netdev_ops->ndo_get_phys_port_name) - return 0; - - ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); - if (!ops) - return -ENOMEM; - - cpu_dp->orig_ndo_ops = dev->netdev_ops; - if (cpu_dp->orig_ndo_ops) - memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops)); - - ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name; - ops->ndo_do_ioctl = dsa_master_ioctl; - - dev->netdev_ops = ops; - - return 0; -} - -static void dsa_master_ndo_teardown(struct net_device *dev) +static void dsa_netdev_ops_set(struct net_device *dev, + const struct dsa_netdevice_ops *ops) { - struct dsa_port *cpu_dp = dev->dsa_ptr; - - if (cpu_dp->orig_ndo_ops) - dev->netdev_ops = cpu_dp->orig_ndo_ops; - cpu_dp->orig_ndo_ops = NULL; + dev->dsa_ptr->netdev_ops = ops; } static ssize_t tagging_show(struct device *d, struct device_attribute *attr, @@ -353,9 +330,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) if (ret) return ret; - ret = dsa_master_ndo_setup(dev); - if (ret) - goto out_err_ethtool_teardown; + dsa_netdev_ops_set(dev, &dsa_netdev_ops); ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); if (ret) @@ -364,8 +339,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) return ret; out_err_ndo_teardown: - dsa_master_ndo_teardown(dev); -out_err_ethtool_teardown: + dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); return ret; } @@ -373,7 +347,7 @@ out_err_ethtool_teardown: void dsa_master_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); - dsa_master_ndo_teardown(dev); + dsa_netdev_ops_set(dev, NULL); dsa_master_ethtool_teardown(dev); dsa_master_reset_mtu(dev); -- cgit From a8b7b2d0b3fc965d823e362840e5451e2eb4a71b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 20 Jul 2020 10:10:41 +0200 Subject: sched: sch_api: add missing rcu read lock to silence the warning In case the qdisc_match_from_root function() is called from non-rcu path with rtnl mutex held, a suspiciout rcu usage warning appears: [ 241.504354] ============================= [ 241.504358] WARNING: suspicious RCU usage [ 241.504366] 5.8.0-rc4-custom-01521-g72a7c7d549c3 #32 Not tainted [ 241.504370] ----------------------------- [ 241.504378] net/sched/sch_api.c:270 RCU-list traversed in non-reader section!! [ 241.504382] other info that might help us debug this: [ 241.504388] rcu_scheduler_active = 2, debug_locks = 1 [ 241.504394] 1 lock held by tc/1391: [ 241.504398] #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0 [ 241.504431] stack backtrace: [ 241.504440] CPU: 0 PID: 1391 Comm: tc Not tainted 5.8.0-rc4-custom-01521-g72a7c7d549c3 #32 [ 241.504446] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 [ 241.504453] Call Trace: [ 241.504465] dump_stack+0x100/0x184 [ 241.504482] lockdep_rcu_suspicious+0x153/0x15d [ 241.504499] qdisc_match_from_root+0x293/0x350 Fix this by passing the rtnl held lockdep condition down to hlist_for_each_entry_rcu() Reported-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 11ebba60da3b..2a76a2f5ed88 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -267,7 +267,8 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { + hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle, + lockdep_rtnl_is_held()) { if (q->handle == handle) return q; } -- cgit From 2b96692bcfcd2be7c9aa55dc13440213cd54c654 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sat, 18 Jul 2020 20:53:38 +0800 Subject: net: hsr: remove redundant null check Because kfree_skb already checked NULL skb parameter, so the additional checks are unnecessary, just remove them. Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- net/hsr/hsr_forward.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ed13760463de..92c8ad75200b 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -367,10 +367,8 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) port->dev->stats.tx_bytes += skb->len; } - if (frame.skb_hsr) - kfree_skb(frame.skb_hsr); - if (frame.skb_std) - kfree_skb(frame.skb_std); + kfree_skb(frame.skb_hsr); + kfree_skb(frame.skb_std); return; out_drop: -- cgit From 71d4364abdc50cb1f0ff5af0f932b110278f620c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 18 Jul 2020 21:04:18 +0300 Subject: net: dsa: use the ETH_MIN_MTU and ETH_DATA_LEN default values Now that DSA supports MTU configuration, undo the effects of commit 8b1efc0f83f1 ("net: remove MTU limits on a few ether_setup callers") and let DSA interfaces use the default min_mtu and max_mtu specified by ether_setup(). This is more important for min_mtu: since DSA is Ethernet, the minimum MTU is the same as of any other Ethernet interface, and definitely not zero. For the max_mtu, we have a callback through which drivers can override that, if they want to. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e147e10b411c..41d60eeefdbd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1754,11 +1754,8 @@ int dsa_slave_create(struct dsa_port *port) eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->min_mtu = 0; if (ds->ops->port_max_mtu) slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); - else - slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, -- cgit From b328ecc468f8f92433c9ad82675c0ce9f99b10cf Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 17 Jul 2020 10:35:32 +0200 Subject: xfrm: Make the policy hold queue work with VTI. We forgot to support the xfrm policy hold queue when VTI was implemented. This patch adds everything we need so that we can use the policy hold queue together with VTI interfaces. Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 6 +++++- net/ipv6/ip6_vti.c | 6 +++++- net/xfrm/xfrm_policy.c | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3e5d54517145..8b962eac9ed8 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -218,12 +218,15 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, } dst_hold(dst); - dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0); + dst = xfrm_lookup_route(tunnel->net, dst, fl, NULL, 0); if (IS_ERR(dst)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; } + if (dst->flags & DST_XFRM_QUEUE) + goto queued; + if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { dev->stats.tx_carrier_errors++; dst_release(dst); @@ -255,6 +258,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } +queued: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 53f12b40528e..f5a4c4a6492b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -491,13 +491,16 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } dst_hold(dst); - dst = xfrm_lookup(t->net, dst, fl, NULL, 0); + dst = xfrm_lookup_route(t->net, dst, fl, NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto tx_err_link_failure; } + if (dst->flags & DST_XFRM_QUEUE) + goto queued; + x = dst->xfrm; if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) goto tx_err_link_failure; @@ -533,6 +536,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_dst_release; } +queued: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 564aa6492e7c..be150475b28b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2758,6 +2758,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; struct sk_buff_head list; + __u32 skb_mark; spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); @@ -2767,7 +2768,12 @@ static void xfrm_policy_queue_process(struct timer_list *t) } dst = skb_dst(skb); sk = skb->sk; + + /* Fixup the mark to support VTI. */ + skb_mark = skb->mark; + skb->mark = pol->mark.v; xfrm_decode_session(skb, &fl, dst->ops->family); + skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); @@ -2799,7 +2805,12 @@ static void xfrm_policy_queue_process(struct timer_list *t) while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); + /* Fixup the mark to support VTI. */ + skb_mark = skb->mark; + skb->mark = pol->mark.v; xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + skb->mark = skb_mark; + dst_hold(xfrm_dst_path(skb_dst(skb))); dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0); if (IS_ERR(dst)) { -- cgit From bc4f0548f683a3d53359cef15f088d2d5bb4bc39 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:33:58 -0700 Subject: bpf: Compute bpf_skc_to_*() helper socket btf ids at build time Currently, socket types (struct tcp_sock, udp_sock, etc.) used by bpf_skc_to_*() helpers are computed when vmlinux_btf is first built in the kernel. Commit 5a2798ab32ba ("bpf: Add BTF_ID_LIST/BTF_ID/BTF_ID_UNUSED macros") implemented a mechanism to compute btf_ids at kernel build time which can simplify kernel implementation and reduce runtime overhead by removing in-kernel btf_id calculation. This patch did exactly this, removing in-kernel btf_id computation and utilizing build-time btf_id computation. If CONFIG_DEBUG_INFO_BTF is not defined, BTF_ID_LIST will define an array with size of 5, which is not enough for btf_sock_ids. So define its own static array if CONFIG_DEBUG_INFO_BTF is not defined. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163358.1393023-1-yhs@fb.com --- net/core/filter.c | 49 ++++++++++++++++++------------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 2bd129b5ae74..5a65fb4b95ff 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9426,19 +9426,19 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) * sock_common as the first argument in its memory layout. */ #define BTF_SOCK_TYPE_xxx \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock") + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) enum { #define BTF_SOCK_TYPE(name, str) name, @@ -9447,26 +9447,13 @@ BTF_SOCK_TYPE_xxx MAX_BTF_SOCK_TYPE, }; -static int btf_sock_ids[MAX_BTF_SOCK_TYPE]; - -#ifdef CONFIG_BPF_SYSCALL -static const char *bpf_sock_types[] = { -#define BTF_SOCK_TYPE(name, str) str, +#ifdef CONFIG_DEBUG_INFO_BTF +BTF_ID_LIST(btf_sock_ids) +#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) BTF_SOCK_TYPE_xxx #undef BTF_SOCK_TYPE -}; - -void init_btf_sock_ids(struct btf *btf) -{ - int i, btf_id; - - for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) { - btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i], - BTF_KIND_STRUCT); - if (btf_id > 0) - btf_sock_ids[i] = btf_id; - } -} +#else +static u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; #endif static bool check_arg_btf_id(u32 btf_id, u32 arg) -- cgit From fce557bcef119a1bc5ab3cb02678cf454bcaf424 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:34:02 -0700 Subject: bpf: Make btf_sock_ids global tcp and udp bpf_iter can reuse some socket ids in btf_sock_ids, so make it global. I put the extern definition in btf_ids.h as a central place so it can be easily discovered by developers. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163402.1393427-1-yhs@fb.com --- net/core/filter.c | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 5a65fb4b95ff..654c346b7d91 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9421,39 +9421,13 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); } -/* Define a list of socket types which can be the argument for - * skc_to_*_sock() helpers. All these sockets should have - * sock_common as the first argument in its memory layout. - */ -#define BTF_SOCK_TYPE_xxx \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, inet_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, inet_connection_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, inet_request_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, inet_timewait_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, request_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, sock_common) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, tcp_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, tcp_request_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) - -enum { -#define BTF_SOCK_TYPE(name, str) name, -BTF_SOCK_TYPE_xxx -#undef BTF_SOCK_TYPE -MAX_BTF_SOCK_TYPE, -}; - #ifdef CONFIG_DEBUG_INFO_BTF -BTF_ID_LIST(btf_sock_ids) +BTF_ID_LIST_GLOBAL(btf_sock_ids) #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) BTF_SOCK_TYPE_xxx #undef BTF_SOCK_TYPE #else -static u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; +u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; #endif static bool check_arg_btf_id(u32 btf_id, u32 arg) -- cgit From 951cf368bcb11d6f817709660cf5cd914072c36f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 20 Jul 2020 09:34:03 -0700 Subject: bpf: net: Use precomputed btf_id for bpf iterators One additional field btf_id is added to struct bpf_ctx_arg_aux to store the precomputed btf_ids. The btf_id is computed at build time with BTF_ID_LIST or BTF_ID_LIST_GLOBAL macro definitions. All existing bpf iterators are changed to used pre-compute btf_ids. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200720163403.1393551-1-yhs@fb.com --- net/ipv4/tcp_ipv4.c | 4 +++- net/ipv4/udp.c | 4 +++- net/ipv6/route.c | 7 ++++++- net/netlink/af_netlink.c | 7 ++++++- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 116c11a0aaed..a7f1b41482f8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include @@ -2954,7 +2955,7 @@ static void bpf_iter_fini_tcp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static const struct bpf_iter_reg tcp_reg_info = { +static struct bpf_iter_reg tcp_reg_info = { .target = "tcp", .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, @@ -2969,6 +2970,7 @@ static const struct bpf_iter_reg tcp_reg_info = { static void __init bpf_iter_register(void) { + tcp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON]; if (bpf_iter_reg_target(&tcp_reg_info)) pr_warn("Warning: could not register bpf iterator tcp\n"); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b738c63d7a77..b5231ab350e0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include "udp_impl.h" @@ -3232,7 +3233,7 @@ static void bpf_iter_fini_udp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static const struct bpf_iter_reg udp_reg_info = { +static struct bpf_iter_reg udp_reg_info = { .target = "udp", .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, @@ -3247,6 +3248,7 @@ static const struct bpf_iter_reg udp_reg_info = { static void __init bpf_iter_register(void) { + udp_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UDP]; if (bpf_iter_reg_target(&udp_reg_info)) pr_warn("Warning: could not register bpf iterator udp\n"); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 427b81cbc164..33f5efbad0a9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -61,6 +61,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include @@ -6423,7 +6424,10 @@ void __init ip6_route_init_special_entries(void) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) -static const struct bpf_iter_reg ipv6_route_reg_info = { +BTF_ID_LIST(btf_fib6_info_id) +BTF_ID(struct, fib6_info) + +static struct bpf_iter_reg ipv6_route_reg_info = { .target = "ipv6_route", .seq_ops = &ipv6_route_seq_ops, .init_seq_private = bpf_iter_init_seq_net, @@ -6438,6 +6442,7 @@ static const struct bpf_iter_reg ipv6_route_reg_info = { static int __init bpf_iter_register(void) { + ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id; return bpf_iter_reg_target(&ipv6_route_reg_info); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4f2c3b14ddbf..3cd58f0c2de4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -2803,7 +2804,10 @@ static const struct rhashtable_params netlink_rhashtable_params = { }; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) -static const struct bpf_iter_reg netlink_reg_info = { +BTF_ID_LIST(btf_netlink_sock_id) +BTF_ID(struct, netlink_sock) + +static struct bpf_iter_reg netlink_reg_info = { .target = "netlink", .seq_ops = &netlink_seq_ops, .init_seq_private = bpf_iter_init_seq_net, @@ -2818,6 +2822,7 @@ static const struct bpf_iter_reg netlink_reg_info = { static int __init bpf_iter_register(void) { + netlink_reg_info.ctx_arg_info[0].btf_id = *btf_netlink_sock_id; return bpf_iter_reg_target(&netlink_reg_info); } #endif -- cgit From 6553e561cadc0ec966ce2f039965a99a7502e19b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 21 Jul 2020 19:53:51 +0300 Subject: devlink: Do not hold devlink mutex when initializing devlink fields There is no need to hold a device global lock when initializing devlink device fields of a devlink instance which is not yet part of the devices list. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 6335e1851088..7df918a5899e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -7421,9 +7421,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc); */ int devlink_register(struct devlink *devlink, struct device *dev) { - mutex_lock(&devlink_mutex); devlink->dev = dev; devlink->registered = true; + mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); -- cgit From 9232a3e67b212f9ef924786f8dde23080acd321a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 21 Jul 2020 19:53:52 +0300 Subject: devlink: Avoid duplicate check for reload enabled flag Reload operation is enabled or not is already checked by devlink_reload(). Hence, remove the duplicate check from devlink_nl_cmd_reload(). Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 7df918a5899e..5c74e67f358c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2967,7 +2967,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) struct net *dest_net = NULL; int err; - if (!devlink_reload_supported(devlink) || !devlink->reload_enabled) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); -- cgit From eac5f8a95ae39dd94af818f0f9fad5d46207ee33 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 21 Jul 2020 19:53:54 +0300 Subject: devlink: Constify devlink instance pointer Constify devlink instance pointer while checking if reload operation is supported or not. This helps to review the scope of checks done in reload. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 5c74e67f358c..8b7bb4bfb6d0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2921,7 +2921,7 @@ static void devlink_reload_netns_change(struct devlink *devlink, DEVLINK_CMD_PARAM_NEW); } -static bool devlink_reload_supported(struct devlink *devlink) +static bool devlink_reload_supported(const struct devlink *devlink) { return devlink->ops->reload_down && devlink->ops->reload_up; } -- cgit From 35dfb013149f74c2be1ff9c78f14e6a3cd1539d1 Mon Sep 17 00:00:00 2001 From: Andrew Sy Kim Date: Wed, 8 Jul 2020 12:16:38 -0400 Subject: ipvs: queue delayed work to expire no destination connections if expire_nodest_conn=1 When expire_nodest_conn=1 and a destination is deleted, IPVS does not expire the existing connections until the next matching incoming packet. If there are many connection entries from a single client to a single destination, many packets may get dropped before all the connections are expired (more likely with lots of UDP traffic). An optimization can be made where upon deletion of a destination, IPVS queues up delayed work to immediately expire any connections with a deleted destination. This ensures any reused source ports from a client (within the IPVS timeouts) are scheduled to new real servers instead of silently dropped. Signed-off-by: Andrew Sy Kim Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 39 ++++++++++++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_core.c | 47 ++++++++++++++++++----------------------- net/netfilter/ipvs/ip_vs_ctl.c | 22 +++++++++++++++++++ 3 files changed, 81 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b3921ae92740..a90b8eac16ac 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1389,6 +1389,45 @@ flush_again: goto flush_again; } } + +#ifdef CONFIG_SYSCTL +void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) +{ + int idx; + struct ip_vs_conn *cp, *cp_c; + struct ip_vs_dest *dest; + + rcu_read_lock(); + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + if (cp->ipvs != ipvs) + continue; + + dest = cp->dest; + if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE)) + continue; + + if (atomic_read(&cp->n_control)) + continue; + + cp_c = cp->control; + IP_VS_DBG(4, "del connection\n"); + ip_vs_conn_del(cp); + if (cp_c && !atomic_read(&cp_c->n_control)) { + IP_VS_DBG(4, "del controlling connection\n"); + ip_vs_conn_del(cp_c); + } + } + cond_resched_rcu(); + + /* netns clean up started, abort delayed work */ + if (!ipvs->enable) + break; + } + rcu_read_unlock(); +} +#endif + /* * per netns init and exit */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b4a6b7662f3f..e3668a6e54e4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -694,16 +694,10 @@ static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) return ipvs->sysctl_nat_icmp_send; } -static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) -{ - return ipvs->sysctl_expire_nodest_conn; -} - #else static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; } static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; } -static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } #endif @@ -2097,36 +2091,35 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int } } - if (unlikely(!cp)) { - int v; - - if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) - return v; - } - - IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); - /* Check the server status */ - if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { + if (cp && cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ + if (sysctl_expire_nodest_conn(ipvs)) { + bool old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); - __u32 flags = cp->flags; - - /* when timer already started, silently drop the packet.*/ - if (timer_pending(&cp->timer)) - __ip_vs_conn_put(cp); - else - ip_vs_conn_put(cp); + if (!old_ct) + cp->flags &= ~IP_VS_CONN_F_NFCT; - if (sysctl_expire_nodest_conn(ipvs) && - !(flags & IP_VS_CONN_F_ONE_PACKET)) { - /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); + __ip_vs_conn_put(cp); + if (old_ct) + return NF_DROP; + cp = NULL; + } else { + __ip_vs_conn_put(cp); + return NF_DROP; } + } - return NF_DROP; + if (unlikely(!cp)) { + int v; + + if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) + return v; } + IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); + ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4af83f466dfc..f984d2c881ff 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -210,6 +210,17 @@ static void update_defense_level(struct netns_ipvs *ipvs) local_bh_enable(); } +/* Handler for delayed work for expiring no + * destination connections + */ +static void expire_nodest_conn_handler(struct work_struct *work) +{ + struct netns_ipvs *ipvs; + + ipvs = container_of(work, struct netns_ipvs, + expire_nodest_conn_work.work); + ip_vs_expire_nodest_conn_flush(ipvs); +} /* * Timer for checking the defense @@ -1164,6 +1175,12 @@ static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, list_add(&dest->t_list, &ipvs->dest_trash); dest->idle_start = 0; spin_unlock_bh(&ipvs->dest_trash_lock); + + /* Queue up delayed work to expire all no destination connections. + * No-op when CONFIG_SYSCTL is disabled. + */ + if (!cleanup) + ip_vs_enqueue_expire_nodest_conns(ipvs); } @@ -4086,6 +4103,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) queue_delayed_work(system_long_wq, &ipvs->defense_work, DEFENSE_TIMER_PERIOD); + /* Init delayed work for expiring no dest conn */ + INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, + expire_nodest_conn_handler); + return 0; } @@ -4093,6 +4114,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { struct net *net = ipvs->net; + cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); -- cgit From 954d82979b2f9dd4c20b895226799650d4841b94 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Jul 2020 15:09:39 -0500 Subject: netfilter: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 2 +- net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/nf_conntrack_h323_asn1.c | 6 +++--- net/netfilter/nf_conntrack_proto.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_standalone.c | 2 +- net/netfilter/nf_nat_core.c | 12 ++++++------ net/netfilter/nf_synproxy_core.c | 6 ++---- net/netfilter/nf_tables_api.c | 8 ++++---- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 2 +- net/netfilter/nft_cmp.c | 4 ++-- net/netfilter/nft_ct.c | 6 +++--- net/netfilter/nft_fib.c | 2 +- net/netfilter/nft_payload.c | 2 +- net/netfilter/utils.c | 8 ++++---- net/netfilter/x_tables.c | 2 +- 17 files changed, 34 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c83ffe912163..38e946fdd041 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1935,7 +1935,7 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, size_kern = match_size; module_put(match->me); break; - case EBT_COMPAT_WATCHER: /* fallthrough */ + case EBT_COMPAT_WATCHER: case EBT_COMPAT_TARGET: wt = xt_request_find_target(NFPROTO_BRIDGE, name, mwt->u.revision); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 56621d6bfd29..920b7c4331f0 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1644,7 +1644,7 @@ dump_last: goto next_set; if (set->variant->uref) set->variant->uref(set, cb, true); - /* fall through */ + fallthrough; default: ret = set->variant->list(set, skb, cb); if (!cb->args[IPSET_CB_ARG0]) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 573cb4481481..e697a824b001 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -257,15 +257,15 @@ static unsigned int get_uint(struct bitstr *bs, int b) case 4: v |= *bs->cur++; v <<= 8; - /* fall through */ + fallthrough; case 3: v |= *bs->cur++; v <<= 8; - /* fall through */ + fallthrough; case 2: v |= *bs->cur++; v <<= 8; - /* fall through */ + fallthrough; case 1: v |= *bs->cur++; break; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a0560d175a7f..95f79980348c 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -610,7 +610,7 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto) switch (nfproto) { case NFPROTO_BRIDGE: nf_ct_netns_do_put(net, NFPROTO_BRIDGE); - /* fall through */ + fallthrough; case NFPROTO_INET: nf_ct_netns_do_put(net, NFPROTO_IPV4); nf_ct_netns_do_put(net, NFPROTO_IPV6); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1926fd56df56..6892e497781c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -900,7 +900,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, return -NF_REPEAT; return NF_DROP; } - /* Fall through */ + fallthrough; case TCP_CONNTRACK_IGNORE: /* Ignored packets: * diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6a26299cb064..a604f43e3e6b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -60,7 +60,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.tcp.port), ntohs(tuple->dst.u.tcp.port)); break; - case IPPROTO_UDPLITE: /* fallthrough */ + case IPPROTO_UDPLITE: case IPPROTO_UDP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.udp.port), diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index bfc555fcbc72..ea923f8cf9c4 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -408,7 +408,7 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, static const unsigned int max_attempts = 128; switch (tuple->dst.protonum) { - case IPPROTO_ICMP: /* fallthrough */ + case IPPROTO_ICMP: case IPPROTO_ICMPV6: /* id is same for either direction... */ keyptr = &tuple->src.u.icmp.id; @@ -442,11 +442,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, } goto find_free_id; #endif - case IPPROTO_UDP: /* fallthrough */ - case IPPROTO_UDPLITE: /* fallthrough */ - case IPPROTO_TCP: /* fallthrough */ - case IPPROTO_SCTP: /* fallthrough */ - case IPPROTO_DCCP: /* fallthrough */ + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: if (maniptype == NF_NAT_MANIP_SRC) keyptr = &tuple->src.u.all; else diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index ebcdc8e54476..9cca35d22927 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -704,8 +704,7 @@ ipv4_synproxy_hook(void *priv, struct sk_buff *skb, nf_ct_seqadj_init(ct, ctinfo, 0); synproxy->tsoff = 0; this_cpu_inc(snet->stats->conn_reopened); - - /* fall through */ + fallthrough; case TCP_CONNTRACK_SYN_SENT: if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; @@ -1128,8 +1127,7 @@ ipv6_synproxy_hook(void *priv, struct sk_buff *skb, nf_ct_seqadj_init(ct, ctinfo, 0); synproxy->tsoff = 0; this_cpu_inc(snet->stats->conn_reopened); - - /* fall through */ + fallthrough; case TCP_CONNTRACK_SYN_SENT: if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6708a4f2eec8..0d96e4eb754d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4375,7 +4375,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: set->use--; - /* fall through */ + fallthrough; default: nf_tables_unbind_set(ctx, set, binding, phase == NFT_TRANS_COMMIT); @@ -6256,7 +6256,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: flowtable->use--; - /* fall through */ + fallthrough; default: return; } @@ -7262,7 +7262,7 @@ static int nf_tables_validate(struct net *net) break; case NFT_VALIDATE_NEED: nft_validate_state_update(net, NFT_VALIDATE_DO); - /* fall through */ + fallthrough; case NFT_VALIDATE_DO: list_for_each_entry(table, &net->nft.tables, list) { if (nft_table_validate(net, table) < 0) @@ -8336,7 +8336,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, default: return -EINVAL; } - /* fall through */ + fallthrough; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 96c74c4c7176..587897a2498b 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -213,7 +213,7 @@ next_rule: jumpstack[stackptr].chain = chain; jumpstack[stackptr].rules = rules + 1; stackptr++; - /* fall through */ + fallthrough; case NFT_GOTO: nft_trace_packet(&info, chain, rule, NFT_TRACETYPE_RULE); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index da915c224a82..89a381f7f945 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -451,7 +451,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, case IPPROTO_TCP: timeouts = nf_tcp_pernet(net)->timeouts; break; - case IPPROTO_UDP: /* fallthrough */ + case IPPROTO_UDP: case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(net)->timeouts; break; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 8a28c127effc..16f4d84599ac 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -43,7 +43,7 @@ void nft_cmp_eval(const struct nft_expr *expr, case NFT_CMP_LT: if (d == 0) goto mismatch; - /* fall through */ + fallthrough; case NFT_CMP_LTE: if (d > 0) goto mismatch; @@ -51,7 +51,7 @@ void nft_cmp_eval(const struct nft_expr *expr, case NFT_CMP_GT: if (d == 0) goto mismatch; - /* fall through */ + fallthrough; case NFT_CMP_GTE: if (d < 0) goto mismatch; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 77258af1fce0..322bd674963e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -129,7 +129,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } #endif - case NFT_CT_BYTES: /* fallthrough */ + case NFT_CT_BYTES: case NFT_CT_PKTS: { const struct nf_conn_acct *acct = nf_conn_acct_find(ct); u64 count = 0; @@ -1013,8 +1013,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, help6 = nf_conntrack_helper_try_module_get(name, family, priv->l4proto); break; - case NFPROTO_NETDEV: /* fallthrough */ - case NFPROTO_BRIDGE: /* same */ + case NFPROTO_NETDEV: + case NFPROTO_BRIDGE: case NFPROTO_INET: help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4, priv->l4proto); diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index cfac0964f48d..4dfdaeaf09a5 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -32,7 +32,7 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, unsigned int hooks; switch (priv->result) { - case NFT_FIB_RESULT_OIF: /* fallthrough */ + case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: hooks = (1 << NF_INET_PRE_ROUTING); break; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index a7de3a58f553..ed7cb9f747f6 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -467,7 +467,7 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, case IPPROTO_UDP: if (!nft_payload_udp_checksum(skb, pkt->xt.thoff)) return -1; - /* Fall through. */ + fallthrough; case IPPROTO_UDPLITE: *l4csum_offset = offsetof(struct udphdr, check); break; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 51b454d8fa9c..cedf47ab3c6f 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -25,7 +25,7 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - /* fall through */ + fallthrough; case CHECKSUM_NONE: if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP) skb->csum = 0; @@ -51,7 +51,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, case CHECKSUM_COMPLETE: if (len == skb->len - dataoff) return nf_ip_checksum(skb, hook, dataoff, protocol); - /* fall through */ + fallthrough; case CHECKSUM_NONE: skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, skb->len - dataoff, 0); @@ -79,7 +79,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - /* fall through */ + fallthrough; case CHECKSUM_NONE: skb->csum = ~csum_unfold( csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -106,7 +106,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, case CHECKSUM_COMPLETE: if (len == skb->len - dataoff) return nf_ip6_checksum(skb, hook, dataoff, protocol); - /* fall through */ + fallthrough; case CHECKSUM_NONE: hsum = skb_checksum(skb, 0, dataoff, 0); skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 99a468be4a59..8b2daccaf8df 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1571,7 +1571,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, trav->curr = trav->curr->next; if (trav->curr != trav->head) break; - /* fall through */ + fallthrough; default: return NULL; } -- cgit From c1d069e3bfc9e4021f087016578fbff209f493fd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 21 Jul 2020 21:08:54 +0200 Subject: mptcp: move helper to where its used Only used in token.c. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 11 ----------- net/mptcp/token.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e5baaef5ec89..6e114c09e5b4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -396,17 +396,6 @@ struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, void mptcp_token_destroy(struct mptcp_sock *msk); void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn); -static inline void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) -{ - /* we might consider a faster version that computes the key as a - * hash of some information available in the MPTCP socket. Use - * random data at the moment, as it's probably the safest option - * in case multiple sockets are opened in different namespaces at - * the same time. - */ - get_random_bytes(key, sizeof(u64)); - mptcp_crypto_key_sha(*key, token, idsn); -} void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 7d8106026081..b25b390dbbff 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -83,6 +83,18 @@ static bool __token_bucket_busy(struct token_bucket *t, u32 token) __token_lookup_req(t, token) || __token_lookup_msk(t, token); } +static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) +{ + /* we might consider a faster version that computes the key as a + * hash of some information available in the MPTCP socket. Use + * random data at the moment, as it's probably the safest option + * in case multiple sockets are opened in different namespaces at + * the same time. + */ + get_random_bytes(key, sizeof(u64)); + mptcp_crypto_key_sha(*key, token, idsn); +} + /** * mptcp_token_new_request - create new key/idsn/token for subflow_request * @req: the request socket -- cgit From a6c0d0934f0d8d4e32a3e3bca4b503a3e7237470 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Jul 2020 09:40:27 +0200 Subject: net: explicitly include in net/core/sock.c The buildbot found a config where the header isn't already implicitly pulled in, so add an explicit include as well. Fixes: 8c918ffbbad4 ("net: remove compat_sock_common_{get,set}sockopt") Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index d828bfe1c47d..6da54eac2b34 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -113,6 +113,7 @@ #include #include #include +#include #include -- cgit From 85e05d263ed2d50e3be86c73175effe780ec1d9a Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Mon, 20 Jul 2020 14:49:39 +0200 Subject: net: dsa: of: Allow ethernet-ports as encapsulating node Due to unified Ethernet Switch Device Tree Bindings allow for ethernet-ports as encapsulating node as well. Signed-off-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index e055efff390b..c0ffc7a2b65f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -727,8 +727,12 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds, ports = of_get_child_by_name(dn, "ports"); if (!ports) { - dev_err(ds->dev, "no ports child node found\n"); - return -EINVAL; + /* The second possibility is "ethernet-ports" */ + ports = of_get_child_by_name(dn, "ethernet-ports"); + if (!ports) { + dev_err(ds->dev, "no ports child node found\n"); + return -EINVAL; + } } for_each_available_child_of_node(ports, port) { -- cgit From 749c08f8206cdf5cad15d557912898ce22aa55da Mon Sep 17 00:00:00 2001 From: Richard Sailer Date: Mon, 20 Jul 2020 18:06:14 +0200 Subject: net: dccp: Add SIOCOUTQ IOCTL support (send buffer fill) This adds support for the SIOCOUTQ IOCTL to get the send buffer fill of a DCCP socket, like UDP and TCP sockets already have. Regarding the used data field: DCCP uses per packet sequence numbers, not per byte, so sequence numbers can't be used like in TCP. sk_wmem_queued is not used by DCCP and always 0, even in test on highly congested paths. Therefore this uses sk_wmem_alloc like in UDP. Signed-off-by: Richard Sailer Signed-off-by: David S. Miller --- net/dccp/proto.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index fd92d3fe321f..9e453611107f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -375,6 +375,15 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) goto out; switch (cmd) { + case SIOCOUTQ: { + int amount = sk_wmem_alloc_get(sk); + /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and + * always 0, comparably to UDP. + */ + + rc = put_user(amount, (int __user *)arg); + } + break; case SIOCINQ: { struct sk_buff *skb; unsigned long amount = 0; -- cgit From 6ab301c98f174a8c25d5351b977a1113e2f1fb91 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Jul 2020 17:20:50 +0200 Subject: mptcp: zero token hash at creation time. Otherwise the 'chain_len' filed will carry random values, some token creation calls will fail due to excessive chain length, causing unexpected fallback to TCP. Fixes: 2c5ebd001d4f ("mptcp: refactor token container") Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/token.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/token.c b/net/mptcp/token.c index b25b390dbbff..97cfc45bcc4f 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -368,7 +368,7 @@ void __init mptcp_token_init(void) sizeof(struct token_bucket), 0, 20,/* one slot per 1MB of memory */ - 0, + HASH_ZERO, NULL, &token_mask, 0, -- cgit From 637989b5d77e954007aecaf6cadc7badc6ab94fb Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 22 Jul 2020 18:57:11 +0300 Subject: devlink: Always use user_ptr[0] for devlink and simplify post_doit Currently devlink instance is searched on all doit() operations. But it is optionally stored into user_ptr[0]. This requires rediscovering devlink again doing post_doit(). Few devlink commands related to port shared buffers needs 3 pointers (devlink, devlink_port, and devlink_sb) while executing doit commands. Though devlink pointer can be derived from the devlink_port during post_doit() operation when doit() callback has acquired devlink instance lock, relying on such scheme to access devlik pointer makes code very fragile. Hence, to avoid ambiguity in post_doit() and to avoid searching devlink instance again, simplify code by always storing devlink instance in user_ptr[0] and derive devlink_sb pointer in their respective callback routines. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/devlink.c | 164 +++++++++++++++++++++++------------------------------ 1 file changed, 70 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 8b7bb4bfb6d0..0ca89196a367 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -386,16 +386,14 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) -#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) -#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(2) -#define DEVLINK_NL_FLAG_NEED_SB BIT(3) +#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(2) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -412,31 +410,19 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_lock(&devlink->lock); - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { - info->user_ptr[0] = devlink; - } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { + info->user_ptr[0] = devlink; + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { devlink_port = devlink_port_get_from_info(devlink, info); if (IS_ERR(devlink_port)) { err = PTR_ERR(devlink_port); goto unlock; } - info->user_ptr[0] = devlink_port; + info->user_ptr[1] = devlink_port; } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) { - info->user_ptr[0] = devlink; devlink_port = devlink_port_get_from_info(devlink, info); if (!IS_ERR(devlink_port)) info->user_ptr[1] = devlink_port; } - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { - struct devlink_sb *devlink_sb; - - devlink_sb = devlink_sb_get_from_info(devlink, info); - if (IS_ERR(devlink_sb)) { - err = PTR_ERR(devlink_sb); - goto unlock; - } - info->user_ptr[1] = devlink_sb; - } return 0; unlock: @@ -451,16 +437,8 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, { struct devlink *devlink; - /* When devlink changes netns, it would not be found - * by devlink_get_from_info(). So try if it is stored first. - */ - if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { - devlink = info->user_ptr[0]; - } else { - devlink = devlink_get_from_info(info); - WARN_ON(IS_ERR(devlink)); - } - if (!IS_ERR(devlink) && ~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) + devlink = info->user_ptr[0]; + if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); } @@ -759,7 +737,7 @@ out: static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; @@ -906,7 +884,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; int err; @@ -1041,10 +1019,14 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -1146,11 +1128,15 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; u16 pool_index; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1255,12 +1241,16 @@ static int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; enum devlink_sb_threshold_type threshold_type; + struct devlink_sb *devlink_sb; u16 pool_index; u32 size; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1339,13 +1329,17 @@ nla_put_failure: static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; u16 pool_index; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1455,12 +1449,17 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, static int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_sb *devlink_sb; u16 pool_index; u32 threshold; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_index_get_from_info(devlink_sb, info, &pool_index); if (err) @@ -1542,14 +1541,18 @@ nla_put_failure: static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = devlink_port->devlink; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_sb *devlink_sb; struct sk_buff *msg; enum devlink_sb_pool_type pool_type; u16 tc_index; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_type_get_from_info(info, &pool_type); if (err) return err; @@ -1689,14 +1692,19 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, static int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; + struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink *devlink = info->user_ptr[0]; enum devlink_sb_pool_type pool_type; + struct devlink_sb *devlink_sb; u16 tc_index; u16 pool_index; u32 threshold; int err; + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); + err = devlink_sb_pool_type_get_from_info(info, &pool_type); if (err) return err; @@ -1724,8 +1732,12 @@ static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; const struct devlink_ops *ops = devlink->ops; + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); if (ops->sb_occ_snapshot) return ops->sb_occ_snapshot(devlink, devlink_sb->index); @@ -1736,8 +1748,12 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - struct devlink_sb *devlink_sb = info->user_ptr[1]; const struct devlink_ops *ops = devlink->ops; + struct devlink_sb *devlink_sb; + + devlink_sb = devlink_sb_get_from_info(devlink, info); + if (IS_ERR(devlink_sb)) + return PTR_ERR(devlink_sb); if (ops->sb_occ_max_clear) return ops->sb_occ_max_clear(devlink, devlink_sb->index); @@ -7018,7 +7034,6 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -7041,24 +7056,20 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_split_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_port_unsplit_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_get_doit, .dumpit = devlink_nl_cmd_sb_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { @@ -7066,8 +7077,6 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_get_doit, .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ }, { @@ -7075,16 +7084,13 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_pool_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_get_doit, .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { @@ -7092,16 +7098,14 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_port_pool_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { @@ -7109,60 +7113,50 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | - DEVLINK_NL_FLAG_NEED_SB, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_snapshot_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_sb_occ_max_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NEED_SB, }, { .cmd = DEVLINK_CMD_ESWITCH_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_get_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_get, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_entries_get, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_headers_get, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -7170,20 +7164,17 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_dpipe_table_counters_set, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_set, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_resource_dump, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -7191,15 +7182,13 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_reload, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_PARAM_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_get_doit, .dumpit = devlink_nl_cmd_param_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -7207,7 +7196,6 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_param_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_PORT_PARAM_GET, @@ -7230,21 +7218,18 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_NEW, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_new, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_region_del, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_READ, @@ -7252,14 +7237,12 @@ static const struct genl_ops devlink_nl_ops[] = { GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_region_read_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_INFO_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_info_get_doit, .dumpit = devlink_nl_cmd_info_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { @@ -7317,46 +7300,39 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_flash_update, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_TRAP_GET, .doit = devlink_nl_cmd_trap_get_doit, .dumpit = devlink_nl_cmd_trap_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_TRAP_SET, .doit = devlink_nl_cmd_trap_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .doit = devlink_nl_cmd_trap_group_get_doit, .dumpit = devlink_nl_cmd_trap_group_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_TRAP_GROUP_SET, .doit = devlink_nl_cmd_trap_group_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .doit = devlink_nl_cmd_trap_policer_get_doit, .dumpit = devlink_nl_cmd_trap_policer_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_TRAP_POLICER_SET, .doit = devlink_nl_cmd_trap_policer_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, }; -- cgit From b71a61ccfebb4ff733d2d9fc66cd5c75b7ae46a2 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:05 +0100 Subject: l2tp: cleanup whitespace use Fix up various whitespace issues as reported by checkpatch.pl: * remove spaces around operators where appropriate, * add missing blank lines following declarations, * remove multiple blank lines, or trailing blank lines at the end of functions. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 38 +++++++++++++++++++------------------- net/l2tp/l2tp_debugfs.c | 2 -- net/l2tp/l2tp_eth.c | 16 ++++++---------- net/l2tp/l2tp_ip.c | 14 ++++++++------ net/l2tp/l2tp_ip6.c | 14 +++++++------- net/l2tp/l2tp_netlink.c | 3 ++- net/l2tp/l2tp_ppp.c | 9 ++++++--- 7 files changed, 48 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6434d17e6e8e..0992d5fab9b8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -94,7 +94,7 @@ struct l2tp_skb_cb { unsigned long expires; }; -#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)]) +#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&skb->cb[sizeof(struct inet_skb_parm)]) static struct workqueue_struct *l2tp_wq; @@ -134,7 +134,6 @@ static inline struct hlist_head * l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id) { return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)]; - } /* Session hash list. @@ -648,9 +647,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, L2TP_SKB_CB(skb)->has_seq = 0; if (tunnel->version == L2TP_HDR_VER_2) { if (hdrflags & L2TP_HDRFLAG_S) { - ns = ntohs(*(__be16 *) ptr); + ns = ntohs(*(__be16 *)ptr); ptr += 2; - nr = ntohs(*(__be16 *) ptr); + nr = ntohs(*(__be16 *)ptr); ptr += 2; /* Store L2TP info in the skb */ @@ -662,7 +661,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, session->name, ns, nr, session->nr); } } else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { - u32 l2h = ntohl(*(__be32 *) ptr); + u32 l2h = ntohl(*(__be32 *)ptr); if (l2h & 0x40000000) { ns = l2h & 0x00ffffff; @@ -777,6 +776,7 @@ EXPORT_SYMBOL(l2tp_recv_common); static int l2tp_session_queue_purge(struct l2tp_session *session) { struct sk_buff *skb = NULL; + BUG_ON(!session); BUG_ON(session->magic != L2TP_SESSION_MAGIC); while ((skb = skb_dequeue(&session->reorder_q))) { @@ -828,7 +828,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) optr = ptr = skb->data; /* Get L2TP header flags */ - hdrflags = ntohs(*(__be16 *) ptr); + hdrflags = ntohs(*(__be16 *)ptr); /* Check protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; @@ -859,14 +859,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) ptr += 2; /* Extract tunnel and session ID */ - tunnel_id = ntohs(*(__be16 *) ptr); + tunnel_id = ntohs(*(__be16 *)ptr); ptr += 2; - session_id = ntohs(*(__be16 *) ptr); + session_id = ntohs(*(__be16 *)ptr); ptr += 2; } else { ptr += 2; /* skip reserved bits */ tunnel_id = tunnel->tunnel_id; - session_id = ntohl(*(__be32 *) ptr); + session_id = ntohl(*(__be32 *)ptr); ptr += 4; } @@ -971,13 +971,13 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) */ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { u16 flags = L2TP_HDR_VER_3; - *((__be16 *) bufp) = htons(flags); + *((__be16 *)bufp) = htons(flags); bufp += 2; - *((__be16 *) bufp) = 0; + *((__be16 *)bufp) = 0; bufp += 2; } - *((__be32 *) bufp) = htonl(session->peer_session_id); + *((__be32 *)bufp) = htonl(session->peer_session_id); bufp += 4; if (session->cookie_len) { memcpy(bufp, &session->cookie[0], session->cookie_len); @@ -1305,9 +1305,9 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, sizeof(udp_conf.peer_ip6)); udp_conf.use_udp6_tx_checksums = - ! cfg->udp6_zero_tx_checksums; + !cfg->udp6_zero_tx_checksums; udp_conf.use_udp6_rx_checksums = - ! cfg->udp6_zero_rx_checksums; + !cfg->udp6_zero_rx_checksums; } else #endif { @@ -1340,7 +1340,7 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = tunnel_id; - err = kernel_bind(sock, (struct sockaddr *) &ip6_addr, + err = kernel_bind(sock, (struct sockaddr *)&ip6_addr, sizeof(ip6_addr)); if (err < 0) goto out; @@ -1350,7 +1350,7 @@ static int l2tp_tunnel_sock_create(struct net *net, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = peer_tunnel_id; err = kernel_connect(sock, - (struct sockaddr *) &ip6_addr, + (struct sockaddr *)&ip6_addr, sizeof(ip6_addr), 0); if (err < 0) goto out; @@ -1367,7 +1367,7 @@ static int l2tp_tunnel_sock_create(struct net *net, ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; - err = kernel_bind(sock, (struct sockaddr *) &ip_addr, + err = kernel_bind(sock, (struct sockaddr *)&ip_addr, sizeof(ip_addr)); if (err < 0) goto out; @@ -1375,7 +1375,7 @@ static int l2tp_tunnel_sock_create(struct net *net, ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->peer_ip; ip_addr.l2tp_conn_id = peer_tunnel_id; - err = kernel_connect(sock, (struct sockaddr *) &ip_addr, + err = kernel_connect(sock, (struct sockaddr *)&ip_addr, sizeof(ip_addr), 0); if (err < 0) goto out; @@ -1593,6 +1593,7 @@ void __l2tp_session_unhash(struct l2tp_session *session) /* For L2TPv3 we have a per-net hash: remove from there, too */ if (tunnel->version != L2TP_HDR_VER_2) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + spin_lock_bh(&pn->l2tp_session_hlist_lock); hlist_del_init_rcu(&session->global_hlist); spin_unlock_bh(&pn->l2tp_session_hlist_lock); @@ -1636,7 +1637,6 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } - } EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 35bb4f3bdbe0..f0301cb41ae0 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -63,7 +63,6 @@ static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) pd->session_idx = 0; l2tp_dfs_next_tunnel(pd); } - } static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs) @@ -90,7 +89,6 @@ out: return pd; } - static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 3099efa19249..a84627d7be27 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -51,7 +51,6 @@ struct l2tp_eth_sess { struct net_device __rcu *dev; }; - static int l2tp_eth_dev_init(struct net_device *dev) { eth_hw_addr_random(dev); @@ -94,13 +93,12 @@ static void l2tp_eth_get_stats64(struct net_device *dev, { struct l2tp_eth *priv = netdev_priv(dev); - stats->tx_bytes = (unsigned long) atomic_long_read(&priv->tx_bytes); - stats->tx_packets = (unsigned long) atomic_long_read(&priv->tx_packets); - stats->tx_dropped = (unsigned long) atomic_long_read(&priv->tx_dropped); - stats->rx_bytes = (unsigned long) atomic_long_read(&priv->rx_bytes); - stats->rx_packets = (unsigned long) atomic_long_read(&priv->rx_packets); - stats->rx_errors = (unsigned long) atomic_long_read(&priv->rx_errors); - + stats->tx_bytes = (unsigned long)atomic_long_read(&priv->tx_bytes); + stats->tx_packets = (unsigned long)atomic_long_read(&priv->tx_packets); + stats->tx_dropped = (unsigned long)atomic_long_read(&priv->tx_dropped); + stats->rx_bytes = (unsigned long)atomic_long_read(&priv->rx_bytes); + stats->rx_packets = (unsigned long)atomic_long_read(&priv->rx_packets); + stats->rx_errors = (unsigned long)atomic_long_read(&priv->rx_errors); } static const struct net_device_ops l2tp_eth_netdev_ops = { @@ -348,13 +346,11 @@ err: return rc; } - static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = { .session_create = l2tp_eth_create, .session_delete = l2tp_session_delete, }; - static int __init l2tp_eth_init(void) { int err = 0; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 2a3fd31fb589..e5b63eab887d 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -126,7 +126,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) /* Point to L2TP header */ optr = ptr = skb->data; - session_id = ntohl(*((__be32 *) ptr)); + session_id = ntohl(*((__be32 *)ptr)); ptr += 4; /* RFC3931: L2TP/IP packets have the first 4 bytes containing @@ -176,7 +176,7 @@ pass_up: if ((skb->data[0] & 0xc0) != 0xc0) goto discard; - tunnel_id = ntohl(*(__be32 *) &skb->data[4]); + tunnel_id = ntohl(*(__be32 *)&skb->data[4]); iph = (struct iphdr *)skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); @@ -260,7 +260,7 @@ static void l2tp_ip_destroy_sock(struct sock *sk) static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); - struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr; + struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr; struct net *net = sock_net(sk); int ret; int chk_addr_ret; @@ -316,7 +316,7 @@ out: static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; + struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr; int rc; if (addr_len < sizeof(*lsa)) @@ -375,6 +375,7 @@ static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, lsa->l2tp_addr.s_addr = inet->inet_daddr; } else { __be32 addr = inet->inet_rcv_saddr; + if (!addr) addr = inet->inet_saddr; lsa->l2tp_conn_id = lsk->conn_id; @@ -422,6 +423,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Get and verify the address. */ if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_l2tpip *, lip, msg->msg_name); + rc = -EINVAL; if (msg->msg_namelen < sizeof(*lip)) goto out; @@ -456,7 +458,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) skb_reset_transport_header(skb); /* Insert 0 session_id */ - *((__be32 *) skb_put(skb, 4)) = 0; + *((__be32 *)skb_put(skb, 4)) = 0; /* Copy user data into skb */ rc = memcpy_from_msg(skb_put(skb, len), msg, len); @@ -467,7 +469,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &inet->cork.fl.u.ip4; if (connected) - rt = (struct rtable *) __sk_dst_check(sk, 0); + rt = (struct rtable *)__sk_dst_check(sk, 0); rcu_read_lock(); if (rt == NULL) { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 4799bec87b33..d17b9fe1180f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -138,7 +138,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) /* Point to L2TP header */ optr = ptr = skb->data; - session_id = ntohl(*((__be32 *) ptr)); + session_id = ntohl(*((__be32 *)ptr)); ptr += 4; /* RFC3931: L2TP/IP packets have the first 4 bytes containing @@ -188,7 +188,7 @@ pass_up: if ((skb->data[0] & 0xc0) != 0xc0) goto discard; - tunnel_id = ntohl(*(__be32 *) &skb->data[4]); + tunnel_id = ntohl(*(__be32 *)&skb->data[4]); iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); @@ -276,7 +276,7 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *) uaddr; + struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *)uaddr; struct net *net = sock_net(sk); __be32 v4addr = 0; int bound_dev_if; @@ -375,8 +375,8 @@ out_unlock: static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *) uaddr; - struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct sockaddr_l2tpip6 *lsa = (struct sockaddr_l2tpip6 *)uaddr; + struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; struct in6_addr *daddr; int addr_type; int rc; @@ -548,7 +548,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) daddr = &lsa->l2tp_addr; if (np->sndflow) { fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK; - if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; @@ -594,7 +594,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (IS_ERR(flowlabel)) return -EINVAL; } - if (!(opt->opt_nflen|opt->opt_flen)) + if (!(opt->opt_nflen | opt->opt_flen)) opt = NULL; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index ebb381c3f1b9..0ce8e94ace78 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -27,7 +27,6 @@ #include "l2tp_core.h" - static struct genl_family l2tp_nl_family; static const struct genl_multicast_group l2tp_multicast_group[] = { @@ -570,6 +569,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_COOKIE]) { u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); + if (len > 8) { ret = -EINVAL; goto out_tunnel; @@ -579,6 +579,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf } if (info->attrs[L2TP_ATTR_PEER_COOKIE]) { u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]); + if (len > 8) { ret = -EINVAL; goto out_tunnel; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index c54cb59593ef..6dccffa29d02 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -351,7 +351,7 @@ error: */ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { - struct sock *sk = (struct sock *) chan->private; + struct sock *sk = (struct sock *)chan->private; struct l2tp_session *session; struct l2tp_tunnel *tunnel; int uhlen, headroom; @@ -928,6 +928,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, inet = inet_sk(tunnel->sock); if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) { struct sockaddr_pppol2tp sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -984,6 +985,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, #endif } else if (tunnel->version == 3) { struct sockaddr_pppol2tpv3 sp; + len = sizeof(sp); memset(&sp, 0, len); sp.sa_family = AF_PPPOX; @@ -1343,7 +1345,7 @@ static int pppol2tp_session_getsockopt(struct sock *sk, break; case PPPOL2TP_SO_REORDERTO: - *val = (int) jiffies_to_msecs(session->reorder_timeout); + *val = (int)jiffies_to_msecs(session->reorder_timeout); l2tp_info(session, L2TP_MSG_CONTROL, "%s: get reorder_timeout=%d\n", session->name, *val); break; @@ -1407,7 +1409,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, if (put_user(len, optlen)) goto end_put_sess; - if (copy_to_user((void __user *) optval, &val, len)) + if (copy_to_user((void __user *)optval, &val, len)) goto end_put_sess; err = 0; @@ -1551,6 +1553,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) if (tunnel->sock) { struct inet_sock *inet = inet_sk(tunnel->sock); + ip = ntohl(inet->inet_saddr); port = ntohs(inet->inet_sport); } -- cgit From 20dcb1107ab1a3423c72a88269b9775cc549262a Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:06 +0100 Subject: l2tp: cleanup comments Modify some l2tp comments to better adhere to kernel coding style, as reported by checkpatch.pl. Add descriptive comments for the l2tp per-net spinlocks to document their use. Fix an incorrect comment in l2tp_recv_common: RFC2661 section 5.4 states that: "The LNS controls enabling and disabling of sequence numbers by sending a data message with or without sequence numbers present at any time during the life of a session." l2tp handles this correctly in l2tp_recv_common, but the comment around the code was incorrect and confusing. Fix up the comment accordingly. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 9 +++--- net/l2tp/l2tp_core.h | 76 ++++++++++++++++++++----------------------------- net/l2tp/l2tp_debugfs.c | 3 +- net/l2tp/l2tp_eth.c | 3 +- net/l2tp/l2tp_ip.c | 3 +- net/l2tp/l2tp_ip6.c | 15 ++++------ net/l2tp/l2tp_netlink.c | 3 +- net/l2tp/l2tp_ppp.c | 3 +- 8 files changed, 47 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 0992d5fab9b8..f5e314d8a02b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * L2TP core. +/* L2TP core. * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd * @@ -102,8 +101,10 @@ static struct workqueue_struct *l2tp_wq; static unsigned int l2tp_net_id; struct l2tp_net { struct list_head l2tp_tunnel_list; + /* Lock for write access to l2tp_tunnel_list */ spinlock_t l2tp_tunnel_list_lock; struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; + /* Lock for write access to l2tp_session_hlist */ spinlock_t l2tp_session_hlist_lock; }; @@ -678,7 +679,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, } if (L2TP_SKB_CB(skb)->has_seq) { - /* Received a packet with sequence numbers. If we're the LNS, + /* Received a packet with sequence numbers. If we're the LAC, * check if we sre sending sequence numbers and if not, * configure it so. */ @@ -1604,7 +1605,7 @@ void __l2tp_session_unhash(struct l2tp_session *session) EXPORT_SYMBOL_GPL(__l2tp_session_unhash); /* This function is used by the netlink SESSION_DELETE command and by - pseudowire modules. + * pseudowire modules. */ int l2tp_session_delete(struct l2tp_session *session) { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 10cf7c3dcbb3..3ebb701eebbf 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * L2TP internal definitions. +/* L2TP internal definitions. * * Copyright (c) 2008,2009 Katalix Systems Ltd */ @@ -49,32 +48,26 @@ struct l2tp_tunnel; */ struct l2tp_session_cfg { enum l2tp_pwtype pw_type; - unsigned int recv_seq:1; /* expect receive packets with - * sequence numbers? */ - unsigned int send_seq:1; /* send packets with sequence - * numbers? */ - unsigned int lns_mode:1; /* behave as LNS? LAC enables - * sequence numbers under - * control of LNS. */ - int debug; /* bitmask of debug message - * categories */ + unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ + unsigned int send_seq:1; /* send packets with sequence numbers? */ + unsigned int lns_mode:1; /* behave as LNS? + * LAC enables sequence numbers under LNS control. + */ + int debug; /* bitmask of debug message categories */ u16 l2specific_type; /* Layer 2 specific type */ u8 cookie[8]; /* optional cookie */ int cookie_len; /* 0, 4 or 8 bytes */ u8 peer_cookie[8]; /* peer's cookie */ int peer_cookie_len; /* 0, 4 or 8 bytes */ - int reorder_timeout; /* configured reorder timeout - * (in jiffies) */ + int reorder_timeout; /* configured reorder timeout (in jiffies) */ char *ifname; }; struct l2tp_session { - int magic; /* should be - * L2TP_SESSION_MAGIC */ + int magic; /* should be L2TP_SESSION_MAGIC */ long dead; - struct l2tp_tunnel *tunnel; /* back pointer to tunnel - * context */ + struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */ u32 session_id; u32 peer_session_id; u8 cookie[8]; @@ -89,42 +82,37 @@ struct l2tp_session { u32 nr_max; /* max NR. Depends on tunnel */ u32 nr_window_size; /* NR window size */ u32 nr_oos; /* NR of last OOS packet */ - int nr_oos_count; /* For OOS recovery */ + int nr_oos_count; /* for OOS recovery */ int nr_oos_count_max; - struct hlist_node hlist; /* Hash list node */ + struct hlist_node hlist; /* hash list node */ refcount_t ref_count; char name[32]; /* for logging */ char ifname[IFNAMSIZ]; - unsigned int recv_seq:1; /* expect receive packets with - * sequence numbers? */ - unsigned int send_seq:1; /* send packets with sequence - * numbers? */ - unsigned int lns_mode:1; /* behave as LNS? LAC enables - * sequence numbers under - * control of LNS. */ - int debug; /* bitmask of debug message - * categories */ - int reorder_timeout; /* configured reorder timeout - * (in jiffies) */ + unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ + unsigned int send_seq:1; /* send packets with sequence numbers? */ + unsigned int lns_mode:1; /* behave as LNS? + * LAC enables sequence numbers under LNS control. + */ + int debug; /* bitmask of debug message categories */ + int reorder_timeout; /* configured reorder timeout (in jiffies) */ int reorder_skip; /* set if skip to next nr */ enum l2tp_pwtype pwtype; struct l2tp_stats stats; - struct hlist_node global_hlist; /* Global hash list node */ + struct hlist_node global_hlist; /* global hash list node */ int (*build_header)(struct l2tp_session *session, void *buf); void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); void (*session_close)(struct l2tp_session *session); void (*show)(struct seq_file *m, void *priv); - u8 priv[]; /* private data */ + u8 priv[]; /* private data */ }; /* Describes the tunnel. It contains info to track all the associated * sessions so incoming packets can be sorted out */ struct l2tp_tunnel_cfg { - int debug; /* bitmask of debug message - * categories */ + int debug; /* bitmask of debug message categories */ enum l2tp_encap_type encap; /* Used only for kernel-created sockets */ @@ -148,31 +136,29 @@ struct l2tp_tunnel { struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ - bool acpt_newsess; /* Indicates whether this - * tunnel accepts new sessions. - * Protected by hlist_lock. + bool acpt_newsess; /* indicates whether this tunnel accepts + * new sessions. Protected by hlist_lock. */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; - /* hashed list of sessions, - * hashed by id */ + /* hashed list of sessions, hashed by id */ u32 tunnel_id; u32 peer_tunnel_id; int version; /* 2=>L2TPv2, 3=>L2TPv3 */ char name[20]; /* for logging */ - int debug; /* bitmask of debug message - * categories */ + int debug; /* bitmask of debug message categories */ enum l2tp_encap_type encap; struct l2tp_stats stats; - struct list_head list; /* Keep a list of all tunnels */ + struct list_head list; /* list node on per-namespace list of tunnels */ struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; void (*old_sk_destruct)(struct sock *); - struct sock *sock; /* Parent socket */ - int fd; /* Parent fd, if tunnel socket - * was created by userspace */ + struct sock *sock; /* parent socket */ + int fd; /* parent fd, if tunnel socket was created + * by userspace + */ struct work_struct del_work; }; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index f0301cb41ae0..93181133e155 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * L2TP subsystem debugfs +/* L2TP subsystem debugfs * * Copyright (c) 2010 Katalix Systems Ltd */ diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index a84627d7be27..7ed2b4eced94 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * L2TPv3 ethernet pseudowire driver +/* L2TPv3 ethernet pseudowire driver * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd */ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index e5b63eab887d..70f9fdaf6c86 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * L2TPv3 IP encapsulation support +/* L2TPv3 IP encapsulation support * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd */ diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d17b9fe1180f..ca7696147c7e 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* - * L2TPv3 IP encapsulation support for IPv6 +/* L2TPv3 IP encapsulation support for IPv6 * * Copyright (c) 2012 Katalix Systems Ltd */ @@ -38,7 +37,8 @@ struct l2tp_ip6_sock { u32 peer_conn_id; /* ipv6_pinfo has to be the last member of l2tp_ip6_sock, see - inet6_sk_generic */ + * inet6_sk_generic + */ struct ipv6_pinfo inet6; }; @@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int err; /* Rough check on arithmetic overflow, - better check is made in ip6_append_data(). + * better check is made in ip6_append_data(). */ if (len > INT_MAX) return -EMSGSIZE; @@ -528,9 +528,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - /* - * Get and verify the address. - */ + /* Get and verify the address */ memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; @@ -555,8 +553,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } } - /* - * Otherwise it will be difficult to maintain + /* Otherwise it will be difficult to maintain * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0ce8e94ace78..3120f8dcc56a 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* - * L2TP netlink layer, for management +/* L2TP netlink layer, for management * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd * diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6dccffa29d02..48fbaf5ee82c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -117,8 +117,7 @@ struct pppol2tp_session { int owner; /* pid that opened the socket */ struct mutex sk_lock; /* Protects .sk */ - struct sock __rcu *sk; /* Pointer to the session - * PPPoX socket */ + struct sock __rcu *sk; /* Pointer to the session PPPoX socket */ struct sock *__sk; /* Copy of .sk, for cleanup */ struct rcu_head rcu; /* For asynchronous release */ }; -- cgit From 9f7da9a0e3dc43fa6046fd29cc4531b86af65446 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:07 +0100 Subject: l2tp: cleanup difficult-to-read line breaks Some l2tp code had line breaks which made the code more difficult to read. These were originally motivated by the 80-character line width coding guidelines, but were actually a negative from the perspective of trying to follow the code. Remove these linebreaks for clearer code, even if we do exceed 80 characters in width in some places. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 69 +++++++++++++++++++++---------------------------- net/l2tp/l2tp_ppp.c | 6 ++--- 2 files changed, 31 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 3120f8dcc56a..42b409bc0de7 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -165,71 +165,63 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info struct l2tp_tunnel_cfg cfg = { 0, }; struct l2tp_tunnel *tunnel; struct net *net = genl_info_net(info); + struct nlattr **attrs = info->attrs; - if (!info->attrs[L2TP_ATTR_CONN_ID]) { + if (!attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; goto out; } - tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); + tunnel_id = nla_get_u32(attrs[L2TP_ATTR_CONN_ID]); - if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) { + if (!attrs[L2TP_ATTR_PEER_CONN_ID]) { ret = -EINVAL; goto out; } - peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]); + peer_tunnel_id = nla_get_u32(attrs[L2TP_ATTR_PEER_CONN_ID]); - if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) { + if (!attrs[L2TP_ATTR_PROTO_VERSION]) { ret = -EINVAL; goto out; } - proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]); + proto_version = nla_get_u8(attrs[L2TP_ATTR_PROTO_VERSION]); - if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) { + if (!attrs[L2TP_ATTR_ENCAP_TYPE]) { ret = -EINVAL; goto out; } - cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]); + cfg.encap = nla_get_u16(attrs[L2TP_ATTR_ENCAP_TYPE]); fd = -1; - if (info->attrs[L2TP_ATTR_FD]) { - fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]); + if (attrs[L2TP_ATTR_FD]) { + fd = nla_get_u32(attrs[L2TP_ATTR_FD]); } else { #if IS_ENABLED(CONFIG_IPV6) - if (info->attrs[L2TP_ATTR_IP6_SADDR] && - info->attrs[L2TP_ATTR_IP6_DADDR]) { - cfg.local_ip6 = nla_data( - info->attrs[L2TP_ATTR_IP6_SADDR]); - cfg.peer_ip6 = nla_data( - info->attrs[L2TP_ATTR_IP6_DADDR]); + if (attrs[L2TP_ATTR_IP6_SADDR] && attrs[L2TP_ATTR_IP6_DADDR]) { + cfg.local_ip6 = nla_data(attrs[L2TP_ATTR_IP6_SADDR]); + cfg.peer_ip6 = nla_data(attrs[L2TP_ATTR_IP6_DADDR]); } else #endif - if (info->attrs[L2TP_ATTR_IP_SADDR] && - info->attrs[L2TP_ATTR_IP_DADDR]) { - cfg.local_ip.s_addr = nla_get_in_addr( - info->attrs[L2TP_ATTR_IP_SADDR]); - cfg.peer_ip.s_addr = nla_get_in_addr( - info->attrs[L2TP_ATTR_IP_DADDR]); + if (attrs[L2TP_ATTR_IP_SADDR] && attrs[L2TP_ATTR_IP_DADDR]) { + cfg.local_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_SADDR]); + cfg.peer_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_DADDR]); } else { ret = -EINVAL; goto out; } - if (info->attrs[L2TP_ATTR_UDP_SPORT]) - cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]); - if (info->attrs[L2TP_ATTR_UDP_DPORT]) - cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]); - cfg.use_udp_checksums = nla_get_flag( - info->attrs[L2TP_ATTR_UDP_CSUM]); + if (attrs[L2TP_ATTR_UDP_SPORT]) + cfg.local_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_SPORT]); + if (attrs[L2TP_ATTR_UDP_DPORT]) + cfg.peer_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_DPORT]); + cfg.use_udp_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_CSUM]); #if IS_ENABLED(CONFIG_IPV6) - cfg.udp6_zero_tx_checksums = nla_get_flag( - info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); - cfg.udp6_zero_rx_checksums = nla_get_flag( - info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); + cfg.udp6_zero_tx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + cfg.udp6_zero_rx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); #endif } - if (info->attrs[L2TP_ATTR_DEBUG]) - cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]); + if (attrs[L2TP_ATTR_DEBUG]) + cfg.debug = nla_get_u32(attrs[L2TP_ATTR_DEBUG]); ret = -EINVAL; switch (cfg.encap) { @@ -715,8 +707,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) || nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) || nla_put_u32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id) || - nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, - session->peer_session_id) || + nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id) || nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) || nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype)) goto nla_put_failure; @@ -724,11 +715,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if ((session->ifname[0] && nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || (session->cookie_len && - nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, - &session->cookie[0])) || + nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, session->cookie)) || (session->peer_cookie_len && - nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, - &session->peer_cookie[0])) || + nla_put(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, session->peer_cookie)) || nla_put_u8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq) || nla_put_u8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq) || nla_put_u8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode) || diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 48fbaf5ee82c..3fed922addb5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1566,8 +1566,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) user_data_ok = 'N'; } - seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> " - "%04X/%04X %d %c\n", + seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> %04X/%04X %d %c\n", session->name, ip, port, tunnel->tunnel_id, session->session_id, @@ -1606,8 +1605,7 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n"); seq_puts(m, "TUNNEL name, user-data-ok session-count\n"); seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n"); - seq_puts(m, " SESSION name, addr/port src-tid/sid " - "dest-tid/sid state user-data-ok\n"); + seq_puts(m, " SESSION name, addr/port src-tid/sid dest-tid/sid state user-data-ok\n"); seq_puts(m, " mtu/mru/rcvseq/sendseq/lns debug reorderto\n"); seq_puts(m, " nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n"); goto out; -- cgit From 8ce9825a5993a935631a84b5cce9f65af61d4e59 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:08 +0100 Subject: l2tp: cleanup wonky alignment of line-broken function calls Arguments should be aligned with the function call open parenthesis as per checkpatch. Tweak some function calls which were not aligned correctly. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 12 ++++++------ net/l2tp/l2tp_debugfs.c | 2 +- net/l2tp/l2tp_ppp.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f5e314d8a02b..3162e395cd4a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1285,10 +1285,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work) * exit hook. */ static int l2tp_tunnel_sock_create(struct net *net, - u32 tunnel_id, - u32 peer_tunnel_id, - struct l2tp_tunnel_cfg *cfg, - struct socket **sockp) + u32 tunnel_id, + u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, + struct socket **sockp) { int err = -EINVAL; struct socket *sock = NULL; @@ -1333,7 +1333,7 @@ static int l2tp_tunnel_sock_create(struct net *net, struct sockaddr_l2tpip6 ip6_addr = {0}; err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, - IPPROTO_L2TP, &sock); + IPPROTO_L2TP, &sock); if (err < 0) goto out; @@ -1361,7 +1361,7 @@ static int l2tp_tunnel_sock_create(struct net *net, struct sockaddr_l2tpip ip_addr = {0}; err = sock_create_kern(net, AF_INET, SOCK_DGRAM, - IPPROTO_L2TP, &sock); + IPPROTO_L2TP, &sock); if (err < 0) goto out; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 93181133e155..bea89c383b7d 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -145,7 +145,7 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) const struct ipv6_pinfo *np = inet6_sk(tunnel->sock); seq_printf(m, " from %pI6c to %pI6c\n", - &np->saddr, &tunnel->sock->sk_v6_daddr); + &np->saddr, &tunnel->sock->sk_v6_daddr); } else #endif seq_printf(m, " from %pI4 to %pI4\n", diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 3fed922addb5..f894dc275393 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1638,7 +1638,7 @@ static __net_init int pppol2tp_init_net(struct net *net) int err = 0; pde = proc_create_net("pppol2tp", 0444, net->proc_net, - &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data)); + &pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data)); if (!pde) { err = -ENOMEM; goto out; -- cgit From 0864e331fd53fec3933b99beeb7b1136b14efc63 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:09 +0100 Subject: l2tp: cleanup suspect code indent l2tp_core has conditionally compiled code in l2tp_xmit_skb for IPv6 support. The structure of this code triggered a checkpatch warning due to incorrect indentation. Fix up the indentation to address the checkpatch warning. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 3162e395cd4a..64d3a1d3ff3c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1122,8 +1122,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len &sk->sk_v6_daddr, udp_len); else #endif - udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, - inet->inet_daddr, udp_len); + udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, + inet->inet_daddr, udp_len); break; case L2TP_ENCAPTYPE_IP: -- cgit From bef04d162c52bd7e0b98ba5ee3f904e6556c6551 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:10 +0100 Subject: l2tp: add identifier name in function pointer prototype Reported by checkpatch: "WARNING: function definition argument 'struct sock *' should also have an identifier name" Add an identifier name to help document the prototype. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3ebb701eebbf..f23b3ff7ffff 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -154,7 +154,7 @@ struct l2tp_tunnel { struct net *l2tp_net; /* the net we belong to */ refcount_t ref_count; - void (*old_sk_destruct)(struct sock *); + void (*old_sk_destruct)(struct sock *sk); struct sock *sock; /* parent socket */ int fd; /* parent fd, if tunnel socket was created * by userspace -- cgit From dbf82f3fac9d7cde572b08bde94d8aa117f92ac9 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:11 +0100 Subject: l2tp: prefer using BIT macro Use BIT(x) rather than (1< Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index f23b3ff7ffff..2d2dd219a176 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -21,11 +21,11 @@ /* Per tunnel, session hash table size */ #define L2TP_HASH_BITS 4 -#define L2TP_HASH_SIZE (1 << L2TP_HASH_BITS) +#define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS) /* System-wide, session hash table size */ #define L2TP_HASH_BITS_2 8 -#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2) +#define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2) struct sk_buff; -- cgit From bdf9866e4b1be12a7af7eaf11f6dcf1b92f7073f Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:12 +0100 Subject: l2tp: prefer seq_puts for unformatted output checkpatch warns about use of seq_printf where seq_puts would do. Modify l2tp_debugfs accordingly. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index bea89c383b7d..ebe03bbb5948 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -199,7 +199,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "%02x%02x%02x%02x", session->cookie[4], session->cookie[5], session->cookie[6], session->cookie[7]); - seq_printf(m, "\n"); + seq_puts(m, "\n"); } if (session->peer_cookie_len) { seq_printf(m, " peer cookie %02x%02x%02x%02x", @@ -209,7 +209,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) seq_printf(m, "%02x%02x%02x%02x", session->peer_cookie[4], session->peer_cookie[5], session->peer_cookie[6], session->peer_cookie[7]); - seq_printf(m, "\n"); + seq_puts(m, "\n"); } seq_printf(m, " %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n", -- cgit From c0235fb39b0cdb8e33f1a68a031a94544487a56d Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:13 +0100 Subject: l2tp: line-break long function prototypes In l2tp_core.c both l2tp_tunnel_create and l2tp_session_create take quite a number of arguments and have a correspondingly long prototype. This is both quite difficult to scan visually, and triggers checkpatch warnings. Add a line break to make these function prototypes more readable. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 64d3a1d3ff3c..3dc712647f34 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1400,7 +1400,8 @@ out: static struct lock_class_key l2tp_socket_class; -int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, + struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; int err; @@ -1641,7 +1642,8 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) } EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); -struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) +struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, + u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; -- cgit From efcd8c8540f73829a26f5d224ebdd99382fd1552 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Wed, 22 Jul 2020 17:32:14 +0100 Subject: l2tp: avoid precidence issues in L2TP_SKB_CB macro checkpatch warned about the L2TP_SKB_CB macro's use of its argument: add braces to avoid the problem. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 3dc712647f34..6871611d99f2 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -93,7 +93,7 @@ struct l2tp_skb_cb { unsigned long expires; }; -#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&skb->cb[sizeof(struct inet_skb_parm)]) +#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&(skb)->cb[sizeof(struct inet_skb_parm)]) static struct workqueue_struct *l2tp_wq; -- cgit From b0977bb268db1df6decd3405903ca500721cdc5f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:29 +0200 Subject: subflow: always init 'rel_write_seq' Currently we do not init the subflow write sequence for MP_JOIN subflows. This will cause bad mapping being generated as soon as we will use non backup subflow. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 1 - net/mptcp/subflow.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f0b0b503c262..59c0eef807b3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1814,7 +1814,6 @@ void mptcp_finish_connect(struct sock *ssk) ack_seq++; subflow->map_seq = ack_seq; subflow->map_subflow_seq = 1; - subflow->rel_write_seq = 1; /* the socket is not connected yet, no msk/subflow ops can access/race * accessing the field below diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 519122e66f17..84e70806b250 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -200,6 +200,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (subflow->conn_finished) return; + subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); -- cgit From 53eb4c383deb97b59f1755fe3035ec7992488375 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:30 +0200 Subject: mptcp: avoid data corruption on reinsert When updating a partially acked data fragment, we actually corrupt it. This is irrelevant till we send data on a single subflow, as retransmitted data, if any are discarded by the peer as duplicate, but it will cause data corruption as soon as we will start creating non backup subflows. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 59c0eef807b3..254e6ef2b4e0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -460,15 +460,20 @@ static void mptcp_clean_una(struct sock *sk) dfrag = mptcp_rtx_head(sk); if (dfrag && after64(snd_una, dfrag->data_seq)) { - u64 delta = dfrag->data_seq + dfrag->data_len - snd_una; + u64 delta = snd_una - dfrag->data_seq; + + if (WARN_ON_ONCE(delta > dfrag->data_len)) + goto out; dfrag->data_seq += delta; + dfrag->offset += delta; dfrag->data_len -= delta; dfrag_uncharge(sk, delta); cleaned = true; } +out: if (cleaned) { sk_mem_reclaim_partial(sk); -- cgit From 0235d075a592dfde575df81f150feb0d95a5ef5c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:31 +0200 Subject: mptcp: mark as fallback even early ones In the unlikely event of a failure at connect time, we currently clear the request_mptcp flag - so that the MPC handshake is not started at all, but the msk is not explicitly marked as fallback. This would lead to later insertion of wrong DSS options in the xmitted packets, in violation of RFC specs and possibly fooling the peer. Fixes: e1ff9e82e2ea ("net: mptcp: improve fallback to TCP") Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 254e6ef2b4e0..2936413171be 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1944,6 +1944,13 @@ unlock: return err; } +static void mptcp_subflow_early_fallback(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + subflow->request_mptcp = 0; + __mptcp_do_fallback(msk); +} + static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { @@ -1975,10 +1982,10 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, * TCP option space. */ if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) - subflow->request_mptcp = 0; + mptcp_subflow_early_fallback(msk, subflow); #endif if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) - subflow->request_mptcp = 0; + mptcp_subflow_early_fallback(msk, subflow); do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); -- cgit From b93df08ccda326ef89a6e80fb796588b9a30a980 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:32 +0200 Subject: mptcp: explicitly track the fully established status Currently accepted msk sockets become established only after accept() returns the new sk to user-space. As MP_JOIN request are refused as per RFC spec on non fully established socket, the above causes mp_join self-tests instabilities. This change lets the msk entering the established status as soon as it receives the 3rd ack and propagates the first subflow fully established status on the msk socket. Finally we can change the subflow acceptance condition to take in account both the sock state and the msk fully established flag. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/options.c | 5 ++--- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 8 ++++++++ net/mptcp/subflow.c | 23 +++++++++++++++++++---- 4 files changed, 31 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 19707c07efc1..3bc56eb608d8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -709,6 +709,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, * additional ack. */ subflow->fully_established = 1; + WRITE_ONCE(msk->fully_established, true); goto fully_established; } @@ -724,9 +725,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); - subflow->fully_established = 1; - subflow->remote_key = mp_opt->sndr_key; - subflow->can_ack = 1; + mptcp_subflow_fully_established(subflow, mp_opt); fully_established: if (likely(subflow->pm_notified)) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2936413171be..979dfcd2aa14 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1522,6 +1522,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; msk->subflow = NULL; + WRITE_ONCE(msk->fully_established, false); msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); @@ -1605,7 +1606,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); - inet_sk_state_store(newsk, TCP_ESTABLISHED); mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(new_mptcp_sock); @@ -1855,7 +1855,7 @@ bool mptcp_finish_join(struct sock *sk) pr_debug("msk=%p, subflow=%p", msk, subflow); /* mptcp socket already closing? */ - if (inet_sk_state_load(parent) != TCP_ESTABLISHED) + if (!mptcp_is_fully_established(parent)) return false; if (!msk->pm.server_side) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6e114c09e5b4..67634b595466 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -198,6 +198,7 @@ struct mptcp_sock { u32 token; unsigned long flags; bool can_ack; + bool fully_established; spinlock_t join_list_lock; struct work_struct work; struct list_head conn_list; @@ -342,6 +343,8 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow) } int mptcp_is_enabled(struct net *net); +void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, + struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); @@ -373,6 +376,11 @@ void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); +static inline bool mptcp_is_fully_established(struct sock *sk) +{ + return inet_sk_state_load(sk) == TCP_ESTABLISHED && + READ_ONCE(mptcp_sk(sk)->fully_established); +} void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 84e70806b250..ea81842fc3b2 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -387,6 +387,17 @@ static void subflow_drop_ctx(struct sock *ssk) kfree_rcu(ctx, rcu); } +void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, + struct mptcp_options_received *mp_opt) +{ + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + subflow->remote_key = mp_opt->sndr_key; + subflow->fully_established = 1; + subflow->can_ack = 1; + WRITE_ONCE(msk->fully_established, true); +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -466,6 +477,11 @@ create_child: } if (ctx->mp_capable) { + /* this can't race with mptcp_close(), as the msk is + * not yet exposted to user-space + */ + inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); + /* new mpc subflow takes ownership of the newly * created mptcp socket */ @@ -478,9 +494,8 @@ create_child: /* with OoO packets we can reach here without ingress * mpc option */ - ctx->remote_key = mp_opt.sndr_key; - ctx->fully_established = mp_opt.mp_capable; - ctx->can_ack = mp_opt.mp_capable; + if (mp_opt.mp_capable) + mptcp_subflow_fully_established(ctx, &mp_opt); } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -967,7 +982,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex, int addrlen; int err; - if (sk->sk_state != TCP_ESTABLISHED) + if (!mptcp_is_fully_established(sk)) return -ENOTCONN; err = mptcp_subflow_create_socket(sk, &sf); -- cgit From fa25e815d963115eb06036a8f6a50e724bc259e2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:33 +0200 Subject: mptcp: cleanup subflow_finish_connect() The mentioned function has several unneeded branches, handle each case - MP_CAPABLE, MP_JOIN, fallback - under a single conditional and drop quite a bit of duplicate code. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 56 ++++++++++++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ea81842fc3b2..7f3ef1840df5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -206,44 +206,34 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); mptcp_get_options(skb, &mp_opt); - if (subflow->request_mptcp && mp_opt.mp_capable) { + if (subflow->request_mptcp) { + if (!mp_opt.mp_capable) { + MPTCP_INC_STATS(sock_net(sk), + MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); + mptcp_do_fallback(sk); + pr_fallback(mptcp_sk(subflow->conn)); + goto fallback; + } + subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; pr_debug("subflow=%p, remote_key=%llu", subflow, subflow->remote_key); - } else if (subflow->request_join && mp_opt.mp_join) { - subflow->mp_join = 1; + mptcp_finish_connect(sk); + } else if (subflow->request_join) { + u8 hmac[SHA256_DIGEST_SIZE]; + + if (!mp_opt.mp_join) + goto do_reset; + subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); - } else { - if (subflow->request_mptcp) - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - mptcp_do_fallback(sk); - pr_fallback(mptcp_sk(subflow->conn)); - } - if (mptcp_check_fallback(sk)) { - mptcp_rcv_space_init(mptcp_sk(parent), sk); - return; - } - - if (subflow->mp_capable) { - pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), - subflow->remote_key); - mptcp_finish_connect(sk); - } else if (subflow->mp_join) { - u8 hmac[SHA256_DIGEST_SIZE]; - - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", - subflow, subflow->thmac, - subflow->remote_nonce); if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); - subflow->mp_join = 0; goto do_reset; } @@ -251,18 +241,22 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->local_nonce, subflow->remote_nonce, hmac); - memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); if (!mptcp_finish_join(sk)) goto do_reset; + subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); - } else { -do_reset: - tcp_send_active_reset(sk, GFP_ATOMIC); - tcp_done(sk); + } else if (mptcp_check_fallback(sk)) { +fallback: + mptcp_rcv_space_init(mptcp_sk(parent), sk); } + return; + +do_reset: + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); } static struct request_sock_ops subflow_request_sock_ops; -- cgit From b7514694ed2952684a1e4fc44d83682140fd8cef Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:34 +0200 Subject: subflow: explicitly check for plain tcp rsk When syncookie are in use, the TCP stack may feed into subflow_syn_recv_sock() plain TCP request sockets. We can't access mptcp_subflow_request_sock-specific fields on such sockets. Explicitly check the rsk ops to do safe accesses. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 7f3ef1840df5..3ef445f59556 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -415,7 +415,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* hopefully temporary handling for MP_JOIN+syncookie */ subflow_req = mptcp_subflow_rsk(req); - fallback_is_fatal = subflow_req->mp_join; + fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join; fallback = !tcp_rsk(req)->is_mptcp; if (fallback) goto create_child; -- cgit From 97e617518cbc318113b034a5fb33f49c81701278 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:35 +0200 Subject: subflow: use rsk_ops->send_reset() tcp_send_active_reset() is more prone to transient errors (memory allocation or xmit queue full): in stress conditions the kernel may drop the egress packet, and the client will be stuck. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3ef445f59556..ada04df6f99f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -524,9 +524,9 @@ out: dispose_child: subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; - tcp_send_active_reset(child, GFP_ATOMIC); inet_csk_prepare_for_destroy_sock(child); tcp_done(child); + req->rsk_ops->send_reset(sk, skb); /* The last child reference will be released by the caller */ return child; -- cgit From 4cf8b7e48a09745145881b311fe6a9154ba69ebc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jul 2020 13:02:36 +0200 Subject: subflow: introduce and use mptcp_can_accept_new_subflow() So that we can easily perform some basic PM-related adimission checks before creating the child socket. Reviewed-by: Mat Martineau Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ada04df6f99f..e645483d1200 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -53,6 +53,12 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); } +static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) +{ + return mptcp_is_fully_established((void *)msk) && + READ_ONCE(msk->pm.accept_subflow); +} + /* validate received token and create truncated hmac and nonce for SYN-ACK */ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, const struct sk_buff *skb) @@ -443,6 +449,7 @@ create_msk: } else if (subflow_req->mp_join) { mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_join || + !mptcp_can_accept_new_subflow(subflow_req->msk) || !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); fallback = true; -- cgit From 49b0aa1b6585705de4a08971602cd2726d9027f0 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 23 Jul 2020 19:13:43 +0800 Subject: net/ncsi: use eth_zero_addr() to clear mac address Use eth_zero_addr() to clear mac address insetad of memset(). Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/ncsi/ncsi-rsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index a94bb59793f0..5b1f4ec66dd9 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -471,7 +471,7 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr) memcpy(&ncf->addrs[index], cmd->mac, ETH_ALEN); } else { clear_bit(cmd->index - 1, bitmap); - memset(&ncf->addrs[index], 0, ETH_ALEN); + eth_zero_addr(&ncf->addrs[index]); } spin_unlock_irqrestore(&nc->lock, flags); -- cgit From 0febc7b3cd17bd2323a036356c2ae6514acf9010 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 23 Jul 2020 12:29:50 +0100 Subject: l2tp: cleanup comparisons to NULL checkpatch warns about comparisons to NULL, e.g. CHECK: Comparison to NULL could be written "!rt" #474: FILE: net/l2tp/l2tp_ip.c:474: + if (rt == NULL) { These sort of comparisons are generally clearer and more readable the way checkpatch suggests, so update l2tp accordingly. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 20 ++++++++++---------- net/l2tp/l2tp_debugfs.c | 12 ++++++------ net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/l2tp/l2tp_netlink.c | 23 +++++++++++------------ net/l2tp/l2tp_ppp.c | 36 ++++++++++++++++++------------------ 6 files changed, 47 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6871611d99f2..2f3e6b3a7d8e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -412,7 +412,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * } /* call private receive handler */ - if (session->recv_skb != NULL) + if (session->recv_skb) (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); else kfree_skb(skb); @@ -911,7 +911,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct l2tp_tunnel *tunnel; tunnel = rcu_dereference_sk_user_data(sk); - if (tunnel == NULL) + if (!tunnel) goto pass_up; l2tp_dbg(tunnel, L2TP_MSG_DATA, "%s: received %d bytes\n", @@ -1150,7 +1150,7 @@ static void l2tp_tunnel_destruct(struct sock *sk) { struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); - if (tunnel == NULL) + if (!tunnel) goto end; l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); @@ -1189,7 +1189,7 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) struct hlist_node *tmp; struct l2tp_session *session; - BUG_ON(tunnel == NULL); + BUG_ON(!tunnel); l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing all sessions...\n", tunnel->name); @@ -1214,7 +1214,7 @@ again: __l2tp_session_unhash(session); l2tp_session_queue_purge(session); - if (session->session_close != NULL) + if (session->session_close) (*session->session_close)(session); l2tp_session_dec_refcount(session); @@ -1407,11 +1407,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 int err; enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; - if (cfg != NULL) + if (cfg) encap = cfg->encap; tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL); - if (tunnel == NULL) { + if (!tunnel) { err = -ENOMEM; goto err; } @@ -1426,7 +1426,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 rwlock_init(&tunnel->hlist_lock); tunnel->acpt_newsess = true; - if (cfg != NULL) + if (cfg) tunnel->debug = cfg->debug; tunnel->encap = encap; @@ -1615,7 +1615,7 @@ int l2tp_session_delete(struct l2tp_session *session) __l2tp_session_unhash(session); l2tp_session_queue_purge(session); - if (session->session_close != NULL) + if (session->session_close) (*session->session_close)(session); l2tp_session_dec_refcount(session); @@ -1648,7 +1648,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn struct l2tp_session *session; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); - if (session != NULL) { + if (session) { session->magic = L2TP_SESSION_MAGIC; session->tunnel = tunnel; diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index ebe03bbb5948..117a6697da72 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -58,7 +58,7 @@ static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; - if (pd->session == NULL) { + if (!pd->session) { pd->session_idx = 0; l2tp_dfs_next_tunnel(pd); } @@ -72,16 +72,16 @@ static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs) if (!pos) goto out; - BUG_ON(m->private == NULL); + BUG_ON(!m->private); pd = m->private; - if (pd->tunnel == NULL) + if (!pd->tunnel) l2tp_dfs_next_tunnel(pd); else l2tp_dfs_next_session(pd); /* NULL tunnel and session indicates end of list */ - if ((pd->tunnel == NULL) && (pd->session == NULL)) + if (!pd->tunnel && !pd->session) pd = NULL; out: @@ -221,7 +221,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v) atomic_long_read(&session->stats.rx_bytes), atomic_long_read(&session->stats.rx_errors)); - if (session->show != NULL) + if (session->show) session->show(m, session); } @@ -268,7 +268,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) int rc = -ENOMEM; pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (pd == NULL) + if (!pd) goto out; /* Derive the network namespace from the pid opening the diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 70f9fdaf6c86..d81564cf1e7f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -471,7 +471,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rt = (struct rtable *)__sk_dst_check(sk, 0); rcu_read_lock(); - if (rt == NULL) { + if (!rt) { const struct ip_options_rcu *inet_opt; inet_opt = rcu_dereference(inet->inet_opt); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ca7696147c7e..614febf8dd0d 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -486,7 +486,7 @@ static int l2tp_ip6_push_pending_frames(struct sock *sk) int err = 0; skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) + if (!skb) goto out; transhdr = (__be32 *)skb_transport_header(skb); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 42b409bc0de7..fc26ebad2f4f 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -333,7 +333,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla goto nla_put_failure; nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); - if (nest == NULL) + if (!nest) goto nla_put_failure; if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, @@ -475,7 +475,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback for (;;) { tunnel = l2tp_tunnel_get_nth(net, ti); - if (tunnel == NULL) + if (!tunnel) goto out; if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, @@ -598,14 +598,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]); #ifdef CONFIG_MODULES - if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) { + if (!l2tp_nl_cmd_ops[cfg.pw_type]) { genl_unlock(); request_module("net-l2tp-type-%u", cfg.pw_type); genl_lock(); } #endif - if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) || - (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) { + if (!l2tp_nl_cmd_ops[cfg.pw_type] || !l2tp_nl_cmd_ops[cfg.pw_type]->session_create) { ret = -EPROTONOSUPPORT; goto out_tunnel; } @@ -637,7 +636,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf u16 pw_type; session = l2tp_nl_session_get(info); - if (session == NULL) { + if (!session) { ret = -ENODEV; goto out; } @@ -662,7 +661,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; session = l2tp_nl_session_get(info); - if (session == NULL) { + if (!session) { ret = -ENODEV; goto out; } @@ -729,7 +728,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl goto nla_put_failure; nest = nla_nest_start_noflag(skb, L2TP_ATTR_STATS); - if (nest == NULL) + if (!nest) goto nla_put_failure; if (nla_put_u64_64bit(skb, L2TP_ATTR_TX_PACKETS, @@ -774,7 +773,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) int ret; session = l2tp_nl_session_get(info); - if (session == NULL) { + if (!session) { ret = -ENODEV; goto err; } @@ -813,14 +812,14 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback int si = cb->args[1]; for (;;) { - if (tunnel == NULL) { + if (!tunnel) { tunnel = l2tp_tunnel_get_nth(net, ti); - if (tunnel == NULL) + if (!tunnel) goto out; } session = l2tp_session_get_nth(tunnel, si); - if (session == NULL) { + if (!session) { ti++; l2tp_tunnel_dec_refcount(tunnel); tunnel = NULL; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f894dc275393..7404661d4117 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -154,12 +154,12 @@ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) { struct l2tp_session *session; - if (sk == NULL) + if (!sk) return NULL; sock_hold(sk); session = (struct l2tp_session *)(sk->sk_user_data); - if (session == NULL) { + if (!session) { sock_put(sk); goto out; } @@ -217,7 +217,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int */ rcu_read_lock(); sk = rcu_dereference(ps->sk); - if (sk == NULL) + if (!sk) goto no_sock; /* If the first two bytes are 0xFF03, consider that it is the PPP's @@ -285,7 +285,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, /* Get session and tunnel contexts */ error = -EBADF; session = pppol2tp_sock_to_session(sk); - if (session == NULL) + if (!session) goto error; tunnel = session->tunnel; @@ -360,7 +360,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Get session and tunnel contexts from the socket */ session = pppol2tp_sock_to_session(sk); - if (session == NULL) + if (!session) goto abort; tunnel = session->tunnel; @@ -703,7 +703,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, * tunnel id. */ if (!info.session_id && !info.peer_session_id) { - if (tunnel == NULL) { + if (!tunnel) { struct l2tp_tunnel_cfg tcfg = { .encap = L2TP_ENCAPTYPE_UDP, .debug = 0, @@ -738,11 +738,11 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, } else { /* Error if we can't find the tunnel */ error = -ENOENT; - if (tunnel == NULL) + if (!tunnel) goto end; /* Error if socket is not prepped */ - if (tunnel->sock == NULL) + if (!tunnel->sock) goto end; } @@ -911,14 +911,14 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, struct pppol2tp_session *pls; error = -ENOTCONN; - if (sk == NULL) + if (!sk) goto end; if (!(sk->sk_state & PPPOX_CONNECTED)) goto end; error = -EBADF; session = pppol2tp_sock_to_session(sk); - if (session == NULL) + if (!session) goto end; pls = l2tp_session_priv(session); @@ -1263,13 +1263,13 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = -ENOTCONN; - if (sk->sk_user_data == NULL) + if (!sk->sk_user_data) goto end; /* Get session context from the socket */ err = -EBADF; session = pppol2tp_sock_to_session(sk); - if (session == NULL) + if (!session) goto end; /* Special case: if session_id == 0x0000, treat as operation on tunnel @@ -1382,13 +1382,13 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; err = -ENOTCONN; - if (sk->sk_user_data == NULL) + if (!sk->sk_user_data) goto end; /* Get the session context */ err = -EBADF; session = pppol2tp_sock_to_session(sk); - if (session == NULL) + if (!session) goto end; /* Special case: if session_id == 0x0000, treat as operation on tunnel */ @@ -1464,7 +1464,7 @@ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; - if (pd->session == NULL) { + if (!pd->session) { pd->session_idx = 0; pppol2tp_next_tunnel(net, pd); } @@ -1479,17 +1479,17 @@ static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs) if (!pos) goto out; - BUG_ON(m->private == NULL); + BUG_ON(!m->private); pd = m->private; net = seq_file_net(m); - if (pd->tunnel == NULL) + if (!pd->tunnel) pppol2tp_next_tunnel(net, pd); else pppol2tp_next_session(net, pd); /* NULL tunnel and session indicates end of list */ - if ((pd->tunnel == NULL) && (pd->session == NULL)) + if (!pd->tunnel && !pd->session) pd = NULL; out: -- cgit From 6c0ec37b82834630cfe99ef42690beef18d2b400 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 23 Jul 2020 12:29:51 +0100 Subject: l2tp: cleanup unnecessary braces in if statements These checks are all simple and don't benefit from extra braces to clarify intent. Remove them for easier-reading code. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 6 +++--- net/l2tp/l2tp_ppp.c | 23 +++++++++-------------- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 2f3e6b3a7d8e..d1403f27135e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -683,7 +683,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * check if we sre sending sequence numbers and if not, * configure it so. */ - if ((!session->lns_mode) && (!session->send_seq)) { + if (!session->lns_mode && !session->send_seq) { l2tp_info(session, L2TP_MSG_SEQ, "%s: requested to enable seq numbers by LNS\n", session->name); @@ -707,7 +707,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * If we're the LNS and we're sending sequence numbers, the * LAC is broken. Discard the frame. */ - if ((!session->lns_mode) && (session->send_seq)) { + if (!session->lns_mode && session->send_seq) { l2tp_info(session, L2TP_MSG_SEQ, "%s: requested to disable seq numbers by LNS\n", session->name); @@ -1389,7 +1389,7 @@ static int l2tp_tunnel_sock_create(struct net *net, out: *sockp = sock; - if ((err < 0) && sock) { + if (err < 0 && sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); *sockp = NULL; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7404661d4117..e58fe7e3b884 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -802,8 +802,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, * the internal context for use by ioctl() and sockopt() * handlers. */ - if ((session->session_id == 0) && - (session->peer_session_id == 0)) { + if (session->session_id == 0 && session->peer_session_id == 0) { error = 0; goto out_no_ppp; } @@ -925,7 +924,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, tunnel = session->tunnel; inet = inet_sk(tunnel->sock); - if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) { + if (tunnel->version == 2 && tunnel->sock->sk_family == AF_INET) { struct sockaddr_pppol2tp sp; len = sizeof(sp); @@ -943,8 +942,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr; memcpy(uaddr, &sp, len); #if IS_ENABLED(CONFIG_IPV6) - } else if ((tunnel->version == 2) && - (tunnel->sock->sk_family == AF_INET6)) { + } else if (tunnel->version == 2 && tunnel->sock->sk_family == AF_INET6) { struct sockaddr_pppol2tpin6 sp; len = sizeof(sp); @@ -962,8 +960,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(&sp.pppol2tp.addr.sin6_addr, &tunnel->sock->sk_v6_daddr, sizeof(tunnel->sock->sk_v6_daddr)); memcpy(uaddr, &sp, len); - } else if ((tunnel->version == 3) && - (tunnel->sock->sk_family == AF_INET6)) { + } else if (tunnel->version == 3 && tunnel->sock->sk_family == AF_INET6) { struct sockaddr_pppol2tpv3in6 sp; len = sizeof(sp); @@ -1179,7 +1176,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, switch (optname) { case PPPOL2TP_SO_RECVSEQ: - if ((val != 0) && (val != 1)) { + if (val != 0 && val != 1) { err = -EINVAL; break; } @@ -1190,7 +1187,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, break; case PPPOL2TP_SO_SENDSEQ: - if ((val != 0) && (val != 1)) { + if (val != 0 && val != 1) { err = -EINVAL; break; } @@ -1208,7 +1205,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, break; case PPPOL2TP_SO_LNSMODE: - if ((val != 0) && (val != 1)) { + if (val != 0 && val != 1) { err = -EINVAL; break; } @@ -1274,8 +1271,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, /* Special case: if session_id == 0x0000, treat as operation on tunnel */ - if ((session->session_id == 0) && - (session->peer_session_id == 0)) { + if (session->session_id == 0 && session->peer_session_id == 0) { tunnel = session->tunnel; err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val); } else { @@ -1392,8 +1388,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, goto end; /* Special case: if session_id == 0x0000, treat as operation on tunnel */ - if ((session->session_id == 0) && - (session->peer_session_id == 0)) { + if (session->session_id == 0 && session->peer_session_id == 0) { tunnel = session->tunnel; err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); if (err) -- cgit From 26d9a2710616c4499b661cada35ac5e4b125ebe8 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 23 Jul 2020 12:29:52 +0100 Subject: l2tp: check socket address type in l2tp_dfs_seq_tunnel_show checkpatch warns about indentation and brace balancing around the conditionally compiled code for AF_INET6 support in l2tp_dfs_seq_tunnel_show. By adding another check on the socket address type we can make the code more readable while removing the checkpatch warning. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_debugfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 117a6697da72..72ba83aa0eaf 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -146,10 +146,12 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) seq_printf(m, " from %pI6c to %pI6c\n", &np->saddr, &tunnel->sock->sk_v6_daddr); - } else + } #endif - seq_printf(m, " from %pI4 to %pI4\n", - &inet->inet_saddr, &inet->inet_daddr); + if (tunnel->sock->sk_family == AF_INET) + seq_printf(m, " from %pI4 to %pI4\n", + &inet->inet_saddr, &inet->inet_daddr); + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) seq_printf(m, " source port %hu, dest port %hu\n", ntohs(inet->inet_sport), ntohs(inet->inet_dport)); -- cgit From 584ca31f469dece9a83cdce33953ba23b115945c Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 23 Jul 2020 12:29:53 +0100 Subject: l2tp: cleanup netlink send of tunnel address information l2tp_nl_tunnel_send has conditionally compiled code to support AF_INET6, which makes the code difficult to follow and triggers checkpatch warnings. Split the code out into functions to handle the AF_INET v.s. AF_INET6 cases, which both improves readability and resolves the checkpatch warnings. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 126 +++++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index fc26ebad2f4f..0021cc03417e 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -310,16 +310,79 @@ out: return ret; } +#if IS_ENABLED(CONFIG_IPV6) +static int l2tp_nl_tunnel_send_addr6(struct sk_buff *skb, struct sock *sk, + enum l2tp_encap_type encap) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + + switch (encap) { + case L2TP_ENCAPTYPE_UDP: + if (udp_get_no_check6_tx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX)) + return -1; + if (udp_get_no_check6_rx(sk) && + nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX)) + return -1; + if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || + nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) + return -1; + fallthrough; + case L2TP_ENCAPTYPE_IP: + if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, &np->saddr) || + nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, &sk->sk_v6_daddr)) + return -1; + break; + } + return 0; +} +#endif + +static int l2tp_nl_tunnel_send_addr4(struct sk_buff *skb, struct sock *sk, + enum l2tp_encap_type encap) +{ + struct inet_sock *inet = inet_sk(sk); + + switch (encap) { + case L2TP_ENCAPTYPE_UDP: + if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx) || + nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || + nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) + return -1; + fallthrough; + case L2TP_ENCAPTYPE_IP: + if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr) || + nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr)) + return -1; + break; + } + + return 0; +} + +/* Append attributes for the tunnel address, handling the different attribute types + * used for different tunnel encapsulation and AF_INET v.s. AF_INET6. + */ +static int l2tp_nl_tunnel_send_addr(struct sk_buff *skb, struct l2tp_tunnel *tunnel) +{ + struct sock *sk = tunnel->sock; + + if (!sk) + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return l2tp_nl_tunnel_send_addr6(skb, sk, tunnel->encap); +#endif + return l2tp_nl_tunnel_send_addr4(skb, sk, tunnel->encap); +} + static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel, u8 cmd) { void *hdr; struct nlattr *nest; - struct sock *sk = NULL; - struct inet_sock *inet; -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6_pinfo *np = NULL; -#endif hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd); if (!hdr) @@ -363,58 +426,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla goto nla_put_failure; nla_nest_end(skb, nest); - sk = tunnel->sock; - if (!sk) - goto out; - -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) - np = inet6_sk(sk); -#endif - - inet = inet_sk(sk); - - switch (tunnel->encap) { - case L2TP_ENCAPTYPE_UDP: - switch (sk->sk_family) { - case AF_INET: - if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx)) - goto nla_put_failure; - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - if (udp_get_no_check6_tx(sk) && - nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX)) - goto nla_put_failure; - if (udp_get_no_check6_rx(sk) && - nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX)) - goto nla_put_failure; - break; -#endif - } - if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || - nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) - goto nla_put_failure; - /* fall through */ - case L2TP_ENCAPTYPE_IP: -#if IS_ENABLED(CONFIG_IPV6) - if (np) { - if (nla_put_in6_addr(skb, L2TP_ATTR_IP6_SADDR, - &np->saddr) || - nla_put_in6_addr(skb, L2TP_ATTR_IP6_DADDR, - &sk->sk_v6_daddr)) - goto nla_put_failure; - } else -#endif - if (nla_put_in_addr(skb, L2TP_ATTR_IP_SADDR, - inet->inet_saddr) || - nla_put_in_addr(skb, L2TP_ATTR_IP_DADDR, - inet->inet_daddr)) - goto nla_put_failure; - break; - } + if (l2tp_nl_tunnel_send_addr(skb, tunnel)) + goto nla_put_failure; -out: genlmsg_end(skb, hdr); return 0; -- cgit From 0787840dad4ce7875f1e85eb8b72a54e9d87f9db Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 23 Jul 2020 12:29:54 +0100 Subject: l2tp: cleanup netlink tunnel create address handling When creating an L2TP tunnel using the netlink API, userspace must either pass a socket FD for the tunnel to use (for managed tunnels), or specify the tunnel source/destination address (for unmanaged tunnels). Since source/destination addresses may be AF_INET or AF_INET6, the l2tp netlink code has conditionally compiled blocks to support IPv6. Rather than embedding these directly into l2tp_nl_cmd_tunnel_create (where it makes the code difficult to read and confuses checkpatch to boot) split the handling of address-related attributes into a separate function. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 57 ++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 0021cc03417e..35716a6e1e2c 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -155,12 +155,38 @@ static int l2tp_session_notify(struct genl_family *family, return ret; } +static int l2tp_nl_cmd_tunnel_create_get_addr(struct nlattr **attrs, struct l2tp_tunnel_cfg *cfg) +{ + if (attrs[L2TP_ATTR_UDP_SPORT]) + cfg->local_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_SPORT]); + if (attrs[L2TP_ATTR_UDP_DPORT]) + cfg->peer_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_DPORT]); + cfg->use_udp_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_CSUM]); + + /* Must have either AF_INET or AF_INET6 address for source and destination */ +#if IS_ENABLED(CONFIG_IPV6) + if (attrs[L2TP_ATTR_IP6_SADDR] && attrs[L2TP_ATTR_IP6_DADDR]) { + cfg->local_ip6 = nla_data(attrs[L2TP_ATTR_IP6_SADDR]); + cfg->peer_ip6 = nla_data(attrs[L2TP_ATTR_IP6_DADDR]); + cfg->udp6_zero_tx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); + cfg->udp6_zero_rx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); + return 0; + } +#endif + if (attrs[L2TP_ATTR_IP_SADDR] && attrs[L2TP_ATTR_IP_DADDR]) { + cfg->local_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_SADDR]); + cfg->peer_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_DADDR]); + return 0; + } + return -EINVAL; +} + static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info) { u32 tunnel_id; u32 peer_tunnel_id; int proto_version; - int fd; + int fd = -1; int ret = 0; struct l2tp_tunnel_cfg cfg = { 0, }; struct l2tp_tunnel *tunnel; @@ -191,33 +217,16 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info } cfg.encap = nla_get_u16(attrs[L2TP_ATTR_ENCAP_TYPE]); - fd = -1; + /* Managed tunnels take the tunnel socket from userspace. + * Unmanaged tunnels must call out the source and destination addresses + * for the kernel to create the tunnel socket itself. + */ if (attrs[L2TP_ATTR_FD]) { fd = nla_get_u32(attrs[L2TP_ATTR_FD]); } else { -#if IS_ENABLED(CONFIG_IPV6) - if (attrs[L2TP_ATTR_IP6_SADDR] && attrs[L2TP_ATTR_IP6_DADDR]) { - cfg.local_ip6 = nla_data(attrs[L2TP_ATTR_IP6_SADDR]); - cfg.peer_ip6 = nla_data(attrs[L2TP_ATTR_IP6_DADDR]); - } else -#endif - if (attrs[L2TP_ATTR_IP_SADDR] && attrs[L2TP_ATTR_IP_DADDR]) { - cfg.local_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_SADDR]); - cfg.peer_ip.s_addr = nla_get_in_addr(attrs[L2TP_ATTR_IP_DADDR]); - } else { - ret = -EINVAL; + ret = l2tp_nl_cmd_tunnel_create_get_addr(attrs, &cfg); + if (ret < 0) goto out; - } - if (attrs[L2TP_ATTR_UDP_SPORT]) - cfg.local_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_SPORT]); - if (attrs[L2TP_ATTR_UDP_DPORT]) - cfg.peer_udp_port = nla_get_u16(attrs[L2TP_ATTR_UDP_DPORT]); - cfg.use_udp_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_CSUM]); - -#if IS_ENABLED(CONFIG_IPV6) - cfg.udp6_zero_tx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]); - cfg.udp6_zero_rx_checksums = nla_get_flag(attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]); -#endif } if (attrs[L2TP_ATTR_DEBUG]) -- cgit From 70c05bfa4a3d36c0d8e7cef86b0f6c8a9cbb8881 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 23 Jul 2020 12:29:55 +0100 Subject: l2tp: cleanup kzalloc calls Passing "sizeof(struct blah)" in kzalloc calls is less readable, potentially prone to future bugs if the type of the pointer is changed, and triggers checkpatch warnings. Tweak the kzalloc calls in l2tp which use this form to avoid the warning. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d1403f27135e..7e3523015d6f 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1410,7 +1410,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg) encap = cfg->encap; - tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL); + tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL); if (!tunnel) { err = -ENOMEM; goto err; @@ -1647,7 +1647,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn { struct l2tp_session *session; - session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); + session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL); if (session) { session->magic = L2TP_SESSION_MAGIC; session->tunnel = tunnel; -- cgit From 5df5661a1387e829c901d009cdd1fccc376cdb74 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 23 Jul 2020 01:43:12 +0300 Subject: net: dsa: stop overriding master's ndo_get_phys_port_name The purpose of this override is to give the user an indication of what the number of the CPU port is (in DSA, the CPU port is a hardware implementation detail and not a network interface capable of traffic). However, it has always failed (by design) at providing this information to the user in a reliable fashion. Prior to commit 3369afba1e46 ("net: Call into DSA netdevice_ops wrappers"), the behavior was to only override this callback if it was not provided by the DSA master. That was its first failure: if the DSA master itself was a DSA port or a switchdev, then the user would not see the number of the CPU port in /sys/class/net/eth0/phys_port_name, but the number of the DSA master port within its respective physical switch. But that was actually ok in a way. The commit mentioned above changed that behavior, and now overrides the master's ndo_get_phys_port_name unconditionally. That comes with problems of its own, which are worse in a way. The idea is that it's typical for switchdev users to have udev rules for consistent interface naming. These are based, among other things, on the phys_port_name attribute. If we let the DSA switch at the bottom to start randomly overriding ndo_get_phys_port_name with its own CPU port, we basically lose any predictability in interface naming, or even uniqueness, for that matter. So, there are reasons to let DSA override the master's callback (to provide a consistent interface, a number which has a clear meaning and must not be interpreted according to context), and there are reasons to not let DSA override it (it breaks udev matching for the DSA master). But, there is an alternative method for users to retrieve the number of the CPU port of each DSA switch in the system: $ devlink port pci/0000:00:00.5/0: type eth netdev swp0 flavour physical port 0 pci/0000:00:00.5/2: type eth netdev swp2 flavour physical port 2 pci/0000:00:00.5/4: type notset flavour cpu port 4 spi/spi2.0/0: type eth netdev sw0p0 flavour physical port 0 spi/spi2.0/1: type eth netdev sw0p1 flavour physical port 1 spi/spi2.0/2: type eth netdev sw0p2 flavour physical port 2 spi/spi2.0/4: type notset flavour cpu port 4 spi/spi2.1/0: type eth netdev sw1p0 flavour physical port 0 spi/spi2.1/1: type eth netdev sw1p1 flavour physical port 1 spi/spi2.1/2: type eth netdev sw1p2 flavour physical port 2 spi/spi2.1/3: type eth netdev sw1p3 flavour physical port 3 spi/spi2.1/4: type notset flavour cpu port 4 So remove this duplicated, unreliable and troublesome method. From this patch on, the phys_port_name attribute of the DSA master will only contain information about itself (if at all). If the users need reliable information about the CPU port they're probably using devlink anyway. Signed-off-by: Vladimir Oltean Acked-by: florian Fainelli Signed-off-by: David S. Miller --- net/core/dev.c | 5 ----- net/dsa/master.c | 12 ------------ 2 files changed, 17 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 316349f6cea5..a986b07ea845 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,7 +98,6 @@ #include #include #include -#include #include #include #include @@ -8605,10 +8604,6 @@ int dev_get_phys_port_name(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; int err; - err = dsa_ndo_get_phys_port_name(dev, name, len); - if (err == 0 || err != -EOPNOTSUPP) - return err; - if (ops->ndo_get_phys_port_name) { err = ops->ndo_get_phys_port_name(dev, name, len); if (err != -EOPNOTSUPP) diff --git a/net/dsa/master.c b/net/dsa/master.c index 0a90911ae31b..61615ebc70e9 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -186,17 +186,6 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } -static int dsa_master_get_phys_port_name(struct net_device *dev, - char *name, size_t len) -{ - struct dsa_port *cpu_dp = dev->dsa_ptr; - - if (snprintf(name, len, "p%d", cpu_dp->index) >= len) - return -EINVAL; - - return 0; -} - static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -228,7 +217,6 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static const struct dsa_netdevice_ops dsa_netdev_ops = { .ndo_do_ioctl = dsa_master_ioctl, - .ndo_get_phys_port_name = dsa_master_get_phys_port_name, }; static int dsa_master_ethtool_setup(struct net_device *dev) -- cgit From 0cb09aff9d49d92305c3969fc84b785117412968 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 23 Jul 2020 01:03:00 +0300 Subject: net/flow_dissector: add packet hash dissection Retreive a hash value from the SKB and store it in the dissector key for future matching. Signed-off-by: Ariel Levkovich Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 142a8824f0a8..29806eb765cf 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -383,6 +383,23 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); +void skb_flow_dissect_hash(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_hash *key; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_HASH)) + return; + + key = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_HASH, + target_container); + + key->hash = skb_get_hash_raw(skb); +} +EXPORT_SYMBOL(skb_flow_dissect_hash); + static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, -- cgit From 5923b8f7fa218a9bccd730c0a9692635eb2fc740 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Thu, 23 Jul 2020 01:03:01 +0300 Subject: net/sched: cls_flower: Add hash info to flow classification Adding new cls flower keys for hash value and hash mask and dissect the hash info from the skb into the flow key towards flow classication. Signed-off-by: Ariel Levkovich Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index acd8e05c2ba5..a4f7ef1de7e7 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -64,6 +64,7 @@ struct fl_flow_key { }; } tp_range; struct flow_dissector_key_ct ct; + struct flow_dissector_key_hash hash; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -318,6 +319,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, ARRAY_SIZE(fl_ct_info_to_flower_map)); + skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); f = fl_mask_lookup(mask, &skb_key); @@ -695,6 +697,9 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY, .len = 128 / BITS_PER_BYTE }, [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 }, + }; static const struct nla_policy @@ -1626,6 +1631,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip); + fl_set_key_val(tb, &key->hash.hash, TCA_FLOWER_KEY_HASH, + &mask->hash.hash, TCA_FLOWER_KEY_HASH_MASK, + sizeof(key->hash.hash)); + if (tb[TCA_FLOWER_KEY_ENC_OPTS]) { ret = fl_set_enc_opt(tb, key, mask, extack); if (ret) @@ -1740,6 +1749,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CT, ct); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_HASH, hash); skb_flow_dissector_init(dissector, keys, cnt); } @@ -2960,6 +2971,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) goto nla_put_failure; + if (fl_dump_key_val(skb, &key->hash.hash, TCA_FLOWER_KEY_HASH, + &mask->hash.hash, TCA_FLOWER_KEY_HASH_MASK, + sizeof(key->hash.hash))) + goto nla_put_failure; + return 0; nla_put_failure: -- cgit From e024e008186bf9f4109c86b66dd60d0f926bc1fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:43 +0200 Subject: bpfilter: fix up a sparse annotation The __user doesn't make sense when casting to an integer type, just switch to a uintptr_t cast which also removes the need for the __force. Signed-off-by: Christoph Hellwig Reviewed-by: Luc Van Oostenryck Signed-off-by: David S. Miller --- net/bpfilter/bpfilter_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 2c31e82cb953..3bac5820062a 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -44,7 +44,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.is_set = is_set; req.pid = current->pid; req.cmd = optname; - req.addr = (long __force __user)optval; + req.addr = (uintptr_t)optval; req.len = optlen; if (!bpfilter_ops.info.tgid) goto out; -- cgit From c9ffebdde8deccf9ea3a7a97bcd84de0a35ddad7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:44 +0200 Subject: net/bpfilter: split __bpfilter_process_sockopt Split __bpfilter_process_sockopt into a low-level send request routine and the actual setsockopt hook to split the init time ping from the actual setsockopt processing. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/bpfilter/bpfilter_kern.c | 51 +++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 3bac5820062a..78d561f2c54d 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -31,48 +31,51 @@ static void __stop_umh(void) shutdown_umh(); } -static int __bpfilter_process_sockopt(struct sock *sk, int optname, - char __user *optval, - unsigned int optlen, bool is_set) +static int bpfilter_send_req(struct mbox_request *req) { - struct mbox_request req; struct mbox_reply reply; loff_t pos; ssize_t n; - int ret = -EFAULT; - req.is_set = is_set; - req.pid = current->pid; - req.cmd = optname; - req.addr = (uintptr_t)optval; - req.len = optlen; if (!bpfilter_ops.info.tgid) - goto out; + return -EFAULT; pos = 0; - n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req), &pos); - if (n != sizeof(req)) { + if (n != sizeof(*req)) { pr_err("write fail %zd\n", n); - __stop_umh(); - ret = -EFAULT; - goto out; + goto stop; } pos = 0; n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply), &pos); if (n != sizeof(reply)) { pr_err("read fail %zd\n", n); - __stop_umh(); - ret = -EFAULT; - goto out; + goto stop; } - ret = reply.status; -out: - return ret; + return reply.status; +stop: + __stop_umh(); + return -EFAULT; +} + +static int bpfilter_process_sockopt(struct sock *sk, int optname, + char __user *optval, unsigned int optlen, + bool is_set) +{ + struct mbox_request req = { + .is_set = is_set, + .pid = current->pid, + .cmd = optname, + .addr = (uintptr_t)optval, + .len = optlen, + }; + return bpfilter_send_req(&req); } static int start_umh(void) { + struct mbox_request req = { .pid = current->pid }; int err; /* fork usermode process */ @@ -82,7 +85,7 @@ static int start_umh(void) pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid)); /* health check that usermode process started correctly */ - if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { + if (bpfilter_send_req(&req) != 0) { shutdown_umh(); return -EFAULT; } @@ -103,7 +106,7 @@ static int __init load_umh(void) mutex_lock(&bpfilter_ops.lock); err = start_umh(); if (!err && IS_ENABLED(CONFIG_INET)) { - bpfilter_ops.sockopt = &__bpfilter_process_sockopt; + bpfilter_ops.sockopt = &bpfilter_process_sockopt; bpfilter_ops.start = &start_umh; } mutex_unlock(&bpfilter_ops.lock); -- cgit From d200cf624c9247ab52b67d34d9e198262a23df31 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:45 +0200 Subject: bpfilter: reject kernel addresses The bpfilter user mode helper processes the optval address using process_vm_readv. Don't send it kernel addresses fed under set_fs(KERNEL_DS) as that won't work. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/bpfilter/bpfilter_kern.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 78d561f2c54d..00540457e5f4 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -70,6 +70,10 @@ static int bpfilter_process_sockopt(struct sock *sk, int optname, .addr = (uintptr_t)optval, .len = optlen, }; + if (uaccess_kernel()) { + pr_err("kernel access not supported\n"); + return -EFAULT; + } return bpfilter_send_req(&req); } -- cgit From b1ea9ff6aff2deae84eccaf0a07cd14912669680 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:47 +0200 Subject: net: switch copy_bpf_fprog_from_user to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/filter.c | 6 +++--- net/core/sock.c | 6 ++++-- net/packet/af_packet.c | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 3fa16b8c0d61..29e3455122f7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -77,14 +77,14 @@ #include #include -int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len) +int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) { if (in_compat_syscall()) { struct compat_sock_fprog f32; if (len != sizeof(f32)) return -EINVAL; - if (copy_from_user(&f32, src, sizeof(f32))) + if (copy_from_sockptr(&f32, src, sizeof(f32))) return -EFAULT; memset(dst, 0, sizeof(*dst)); dst->len = f32.len; @@ -92,7 +92,7 @@ int copy_bpf_fprog_from_user(struct sock_fprog *dst, void __user *src, int len) } else { if (len != sizeof(*dst)) return -EINVAL; - if (copy_from_user(dst, src, sizeof(*dst))) + if (copy_from_sockptr(dst, src, sizeof(*dst))) return -EFAULT; } diff --git a/net/core/sock.c b/net/core/sock.c index 6da54eac2b34..71fc7e4ddd06 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1063,7 +1063,8 @@ set_sndbuf: case SO_ATTACH_FILTER: { struct sock_fprog fprog; - ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); + ret = copy_bpf_fprog_from_user(&fprog, USER_SOCKPTR(optval), + optlen); if (!ret) ret = sk_attach_filter(&fprog, sk); break; @@ -1084,7 +1085,8 @@ set_sndbuf: case SO_ATTACH_REUSEPORT_CBPF: { struct sock_fprog fprog; - ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); + ret = copy_bpf_fprog_from_user(&fprog, USER_SOCKPTR(optval), + optlen); if (!ret) ret = sk_reuseport_attach_filter(&fprog, sk); break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c240fb5de3f0..d8d4f78f78e4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1536,7 +1536,7 @@ static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new) } } -static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data, +static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, unsigned int len) { struct bpf_prog *new; @@ -1584,7 +1584,7 @@ static int fanout_set_data(struct packet_sock *po, char __user *data, { switch (po->fanout->type) { case PACKET_FANOUT_CBPF: - return fanout_set_data_cbpf(po, data, len); + return fanout_set_data_cbpf(po, USER_SOCKPTR(data), len); case PACKET_FANOUT_EBPF: return fanout_set_data_ebpf(po, data, len); default: -- cgit From 5790642b4748acbb5560f7a6fcb19f572297cf63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:48 +0200 Subject: net: switch sock_setbindtodevice to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/sock.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 71fc7e4ddd06..5b55bc9397f2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -609,8 +609,7 @@ int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) } EXPORT_SYMBOL(sock_bindtoindex); -static int sock_setbindtodevice(struct sock *sk, char __user *optval, - int optlen) +static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -632,7 +631,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, memset(devname, 0, sizeof(devname)); ret = -EFAULT; - if (copy_from_user(devname, optval, optlen)) + if (copy_from_sockptr(devname, optval, optlen)) goto out; index = 0; @@ -840,7 +839,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ if (optname == SO_BINDTODEVICE) - return sock_setbindtodevice(sk, optval, optlen); + return sock_setbindtodevice(sk, USER_SOCKPTR(optval), optlen); if (optlen < sizeof(int)) return -EINVAL; -- cgit From c34645ac25484968ddace9a6b140b70e72dc49b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:49 +0200 Subject: net: switch sock_set_timeout to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/sock.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 5b55bc9397f2..8b9eddaff868 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -361,7 +361,8 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval) return sizeof(tv); } -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval) +static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, + bool old_timeval) { struct __kernel_sock_timeval tv; @@ -371,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool if (optlen < sizeof(tv32)) return -EINVAL; - if (copy_from_user(&tv32, optval, sizeof(tv32))) + if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) return -EFAULT; tv.tv_sec = tv32.tv_sec; tv.tv_usec = tv32.tv_usec; @@ -380,14 +381,14 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool if (optlen < sizeof(old_tv)) return -EINVAL; - if (copy_from_user(&old_tv, optval, sizeof(old_tv))) + if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) return -EFAULT; tv.tv_sec = old_tv.tv_sec; tv.tv_usec = old_tv.tv_usec; } else { if (optlen < sizeof(tv)) return -EINVAL; - if (copy_from_user(&tv, optval, sizeof(tv))) + if (copy_from_sockptr(&tv, optval, sizeof(tv))) return -EFAULT; } if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) @@ -1051,12 +1052,14 @@ set_sndbuf: case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD); + ret = sock_set_timeout(&sk->sk_rcvtimeo, USER_SOCKPTR(optval), + optlen, optname == SO_RCVTIMEO_OLD); break; case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD); + ret = sock_set_timeout(&sk->sk_sndtimeo, USER_SOCKPTR(optval), + optlen, optname == SO_SNDTIMEO_OLD); break; case SO_ATTACH_FILTER: { -- cgit From c8c1bbb6eb498109286739f8b6090e99313dd104 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:50 +0200 Subject: net: switch sock_set_timeout to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Acked-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/core/sock.c | 26 ++++++++++++-------------- net/mptcp/protocol.c | 6 ++++-- net/socket.c | 3 ++- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8b9eddaff868..1444d7d53ba2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -826,7 +826,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf); */ int sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock_txtime sk_txtime; struct sock *sk = sock->sk; @@ -840,12 +840,12 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ if (optname == SO_BINDTODEVICE) - return sock_setbindtodevice(sk, USER_SOCKPTR(optval), optlen); + return sock_setbindtodevice(sk, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; valbool = val ? 1 : 0; @@ -958,7 +958,7 @@ set_sndbuf: ret = -EINVAL; /* 1003.1g */ break; } - if (copy_from_user(&ling, optval, sizeof(ling))) { + if (copy_from_sockptr(&ling, optval, sizeof(ling))) { ret = -EFAULT; break; } @@ -1052,21 +1052,20 @@ set_sndbuf: case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_rcvtimeo, USER_SOCKPTR(optval), + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD); break; case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: - ret = sock_set_timeout(&sk->sk_sndtimeo, USER_SOCKPTR(optval), + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD); break; case SO_ATTACH_FILTER: { struct sock_fprog fprog; - ret = copy_bpf_fprog_from_user(&fprog, USER_SOCKPTR(optval), - optlen); + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); if (!ret) ret = sk_attach_filter(&fprog, sk); break; @@ -1077,7 +1076,7 @@ set_sndbuf: u32 ufd; ret = -EFAULT; - if (copy_from_user(&ufd, optval, sizeof(ufd))) + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) break; ret = sk_attach_bpf(ufd, sk); @@ -1087,8 +1086,7 @@ set_sndbuf: case SO_ATTACH_REUSEPORT_CBPF: { struct sock_fprog fprog; - ret = copy_bpf_fprog_from_user(&fprog, USER_SOCKPTR(optval), - optlen); + ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); if (!ret) ret = sk_reuseport_attach_filter(&fprog, sk); break; @@ -1099,7 +1097,7 @@ set_sndbuf: u32 ufd; ret = -EFAULT; - if (copy_from_user(&ufd, optval, sizeof(ufd))) + if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) break; ret = sk_reuseport_attach_bpf(ufd, sk); @@ -1179,7 +1177,7 @@ set_sndbuf: if (sizeof(ulval) != sizeof(val) && optlen >= sizeof(ulval) && - get_user(ulval, (unsigned long __user *)optval)) { + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { ret = -EFAULT; break; } @@ -1222,7 +1220,7 @@ set_sndbuf: if (optlen != sizeof(struct sock_txtime)) { ret = -EINVAL; break; - } else if (copy_from_user(&sk_txtime, optval, + } else if (copy_from_sockptr(&sk_txtime, optval, sizeof(struct sock_txtime))) { ret = -EFAULT; break; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 979dfcd2aa14..7246847efa90 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1648,7 +1648,8 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return -EINVAL; } - ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); + ret = sock_setsockopt(ssock, SOL_SOCKET, optname, + USER_SOCKPTR(optval), optlen); if (ret == 0) { if (optname == SO_REUSEPORT) sk->sk_reuseport = ssock->sk->sk_reuseport; @@ -1659,7 +1660,8 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return ret; } - return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); + return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, + USER_SOCKPTR(optval), optlen); } static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, diff --git a/net/socket.c b/net/socket.c index 93846568c2fb..c97f83d879ae 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2130,7 +2130,8 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval, } if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) - err = sock_setsockopt(sock, level, optname, optval, optlen); + err = sock_setsockopt(sock, level, optname, + USER_SOCKPTR(optval), optlen); else if (unlikely(!sock->ops->setsockopt)) err = -EOPNOTSUPP; else -- cgit From c6d1b26a8fd4940fe5d4199311838f6c2aef0174 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:51 +0200 Subject: net/xfrm: switch xfrm_user_policy to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 3 ++- net/ipv6/ipv6_sockglue.c | 3 ++- net/xfrm/xfrm_state.c | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index a5ea02d7a183..da933f99b5d5 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1322,7 +1322,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EPERM; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) break; - err = xfrm_user_policy(sk, optname, optval, optlen); + err = xfrm_user_policy(sk, optname, USER_SOCKPTR(optval), + optlen); break; case IP_TRANSPARENT: diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index add8f7912299..56a74707c617 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -935,7 +935,8 @@ done: retv = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; - retv = xfrm_user_policy(sk, optname, optval, optlen); + retv = xfrm_user_policy(sk, optname, USER_SOCKPTR(optval), + optlen); break; case IPV6_ADDR_PREFERENCES: diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8be2d926acc2..69520ad3d83b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2264,7 +2264,7 @@ static bool km_is_alive(const struct km_event *c) return is_alive; } -int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) +int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) { int err; u8 *data; @@ -2274,7 +2274,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (in_compat_syscall()) return -EOPNOTSUPP; - if (!optval && !optlen) { + if (sockptr_is_null(optval) && !optlen) { xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); __sk_dst_reset(sk); @@ -2284,7 +2284,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (optlen <= 0 || optlen > PAGE_SIZE) return -EMSGSIZE; - data = memdup_user(optval, optlen); + data = memdup_sockptr(optval, optlen); if (IS_ERR(data)) return PTR_ERR(data); -- cgit From 7e4b9dbabb2ad0d108d12939e34d4d330104ac6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:52 +0200 Subject: netfilter: remove the unused user argument to do_update_counters Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index fe13108af1f5..12f8929667bf 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1242,9 +1242,8 @@ void ebt_unregister_table(struct net *net, struct ebt_table *table, /* userspace just supplied us with counters */ static int do_update_counters(struct net *net, const char *name, - struct ebt_counter __user *counters, - unsigned int num_counters, - const void __user *user, unsigned int len) + struct ebt_counter __user *counters, + unsigned int num_counters, unsigned int len) { int i, ret; struct ebt_counter *tmp; @@ -1299,7 +1298,7 @@ static int update_counters(struct net *net, const void __user *user, return -EINVAL; return do_update_counters(net, hlp.name, hlp.counters, - hlp.num_counters, user, len); + hlp.num_counters, len); } static inline int ebt_obj_to_user(char __user *um, const char *_name, @@ -2231,7 +2230,7 @@ static int compat_update_counters(struct net *net, void __user *user, return update_counters(net, user, len); return do_update_counters(net, hlp.name, compat_ptr(hlp.counters), - hlp.num_counters, user, len); + hlp.num_counters, len); } static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, -- cgit From ab214d1bf8c7ef1ed7af803a72491cb29edfa8f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:53 +0200 Subject: netfilter: switch xt_copy_counters to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 7 +++---- net/ipv4/netfilter/ip_tables.c | 7 +++---- net/ipv6/netfilter/ip6_tables.c | 6 +++--- net/netfilter/x_tables.c | 20 ++++++++++---------- 4 files changed, 19 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2c8a4dad39d7..6d24b686c7f0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -996,8 +996,7 @@ static int do_replace(struct net *net, const void __user *user, return ret; } -static int do_add_counters(struct net *net, const void __user *user, - unsigned int len) +static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; @@ -1008,7 +1007,7 @@ static int do_add_counters(struct net *net, const void __user *user, struct arpt_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp); + paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); @@ -1420,7 +1419,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len); + ret = do_add_counters(sock_net(sk), USER_SOCKPTR(user), len); break; default: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 161901dd1cae..4697d09c98dc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1151,8 +1151,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) } static int -do_add_counters(struct net *net, const void __user *user, - unsigned int len) +do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; @@ -1163,7 +1162,7 @@ do_add_counters(struct net *net, const void __user *user, struct ipt_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp); + paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); @@ -1629,7 +1628,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len); + ret = do_add_counters(sock_net(sk), USER_SOCKPTR(user), len); break; default: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index fd1f8f931231..a787aba30e2d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1168,7 +1168,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) } static int -do_add_counters(struct net *net, const void __user *user, unsigned int len) +do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; @@ -1179,7 +1179,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len) struct ip6t_entry *iter; unsigned int addend; - paddc = xt_copy_counters_from_user(user, len, &tmp); + paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); @@ -1637,7 +1637,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), user, len); + ret = do_add_counters(sock_net(sk), USER_SOCKPTR(user), len); break; default: diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 32bab45af7e4..b97eb4b538fd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1028,9 +1028,9 @@ int xt_check_target(struct xt_tgchk_param *par, EXPORT_SYMBOL_GPL(xt_check_target); /** - * xt_copy_counters_from_user - copy counters and metadata from userspace + * xt_copy_counters - copy counters and metadata from a sockptr_t * - * @user: src pointer to userspace memory + * @arg: src sockptr * @len: alleged size of userspace memory * @info: where to store the xt_counters_info metadata * @@ -1047,8 +1047,8 @@ EXPORT_SYMBOL_GPL(xt_check_target); * Return: returns pointer that caller has to test via IS_ERR(). * If IS_ERR is false, caller has to vfree the pointer. */ -void *xt_copy_counters_from_user(const void __user *user, unsigned int len, - struct xt_counters_info *info) +void *xt_copy_counters(sockptr_t arg, unsigned int len, + struct xt_counters_info *info) { void *mem; u64 size; @@ -1062,12 +1062,12 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, return ERR_PTR(-EINVAL); len -= sizeof(compat_tmp); - if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) + if (copy_from_sockptr(&compat_tmp, arg, sizeof(compat_tmp)) != 0) return ERR_PTR(-EFAULT); memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; - user += sizeof(compat_tmp); + sockptr_advance(arg, sizeof(compat_tmp)); } else #endif { @@ -1075,10 +1075,10 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, return ERR_PTR(-EINVAL); len -= sizeof(*info); - if (copy_from_user(info, user, sizeof(*info)) != 0) + if (copy_from_sockptr(info, arg, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); - user += sizeof(*info); + sockptr_advance(arg, sizeof(*info)); } info->name[sizeof(info->name) - 1] = '\0'; @@ -1092,13 +1092,13 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (!mem) return ERR_PTR(-ENOMEM); - if (copy_from_user(mem, user, len) == 0) + if (copy_from_sockptr(mem, arg, len) == 0) return mem; vfree(mem); return ERR_PTR(-EFAULT); } -EXPORT_SYMBOL_GPL(xt_copy_counters_from_user); +EXPORT_SYMBOL_GPL(xt_copy_counters); #ifdef CONFIG_COMPAT int xt_compat_target_offset(const struct xt_target *target) -- cgit From c2f12630c60ff33a9cafd221646053fc10ec59b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:54 +0200 Subject: netfilter: switch nf_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 37 +++++++++++++++++-------------------- net/decnet/af_decnet.c | 3 ++- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/netfilter/arp_tables.c | 28 ++++++++++++++-------------- net/ipv4/netfilter/ip_tables.c | 24 ++++++++++++------------ net/ipv6/ipv6_sockglue.c | 3 ++- net/ipv6/netfilter/ip6_tables.c | 24 ++++++++++++------------ net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/nf_sockopt.c | 2 +- 9 files changed, 64 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 12f8929667bf..d35173e803d3 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1063,14 +1063,13 @@ free_counterstmp: } /* replace the table */ -static int do_replace(struct net *net, const void __user *user, - unsigned int len) +static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret, countersize; struct ebt_table_info *newinfo; struct ebt_replace tmp; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; if (len != sizeof(tmp) + tmp.entries_size) @@ -1286,12 +1285,11 @@ free_tmp: return ret; } -static int update_counters(struct net *net, const void __user *user, - unsigned int len) +static int update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct ebt_replace hlp; - if (copy_from_user(&hlp, user, sizeof(hlp))) + if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) @@ -2079,7 +2077,7 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user, static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, - void __user *user, unsigned int len) + sockptr_t arg, unsigned int len) { struct compat_ebt_replace tmp; int i; @@ -2087,7 +2085,7 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, if (len < sizeof(tmp)) return -EINVAL; - if (copy_from_user(&tmp, user, sizeof(tmp))) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp))) return -EFAULT; if (len != sizeof(tmp) + tmp.entries_size) @@ -2114,8 +2112,7 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, return 0; } -static int compat_do_replace(struct net *net, void __user *user, - unsigned int len) +static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret, i, countersize, size64; struct ebt_table_info *newinfo; @@ -2123,10 +2120,10 @@ static int compat_do_replace(struct net *net, void __user *user, struct ebt_entries_buf_state state; void *entries_tmp; - ret = compat_copy_ebt_replace_from_user(&tmp, user, len); + ret = compat_copy_ebt_replace_from_user(&tmp, arg, len); if (ret) { /* try real handler in case userland supplied needed padding */ - if (ret == -EINVAL && do_replace(net, user, len) == 0) + if (ret == -EINVAL && do_replace(net, arg, len) == 0) ret = 0; return ret; } @@ -2217,17 +2214,17 @@ out_unlock: goto free_entries; } -static int compat_update_counters(struct net *net, void __user *user, +static int compat_update_counters(struct net *net, sockptr_t arg, unsigned int len) { struct compat_ebt_replace hlp; - if (copy_from_user(&hlp, user, sizeof(hlp))) + if (copy_from_sockptr(&hlp, arg, sizeof(hlp))) return -EFAULT; /* try real handler in case userland supplied needed padding */ if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) - return update_counters(net, user, len); + return update_counters(net, arg, len); return do_update_counters(net, hlp.name, compat_ptr(hlp.counters), hlp.num_counters, len); @@ -2368,7 +2365,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, +static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { struct net *net = sock_net(sk); @@ -2381,18 +2378,18 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, case EBT_SO_SET_ENTRIES: #ifdef CONFIG_COMPAT if (in_compat_syscall()) - ret = compat_do_replace(net, user, len); + ret = compat_do_replace(net, arg, len); else #endif - ret = do_replace(net, user, len); + ret = do_replace(net, arg, len); break; case EBT_SO_SET_COUNTERS: #ifdef CONFIG_COMPAT if (in_compat_syscall()) - ret = compat_update_counters(net, user, len); + ret = compat_update_counters(net, arg, len); else #endif - ret = update_counters(net, user, len); + ret = update_counters(net, arg, len); break; default: ret = -EINVAL; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7d7ae2dd69b8..7d51ab608fb3 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1332,7 +1332,8 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != DSO_LINKINFO && optname != DSO_STREAM && optname != DSO_SEQPACKET) - err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); + err = nf_setsockopt(sk, PF_DECnet, optname, + USER_SOCKPTR(optval), optlen); #endif return err; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index da933f99b5d5..42befbf12846 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1422,7 +1422,8 @@ int ip_setsockopt(struct sock *sk, int level, optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && !ip_mroute_opt(optname)) - err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); + err = nf_setsockopt(sk, PF_INET, optname, USER_SOCKPTR(optval), + optlen); #endif return err; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6d24b686c7f0..f5b26ef17820 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only + /* * Packet matching code for ARP packets. * @@ -947,8 +947,7 @@ static int __do_replace(struct net *net, const char *name, return ret; } -static int do_replace(struct net *net, const void __user *user, - unsigned int len) +static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct arpt_replace tmp; @@ -956,7 +955,7 @@ static int do_replace(struct net *net, const void __user *user, void *loc_cpu_entry; struct arpt_entry *iter; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ @@ -972,8 +971,8 @@ static int do_replace(struct net *net, const void __user *user, return -ENOMEM; loc_cpu_entry = newinfo->entries; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { + sockptr_advance(arg, sizeof(tmp)); + if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1244,8 +1243,7 @@ out_unlock: return ret; } -static int compat_do_replace(struct net *net, void __user *user, - unsigned int len) +static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_arpt_replace tmp; @@ -1253,7 +1251,7 @@ static int compat_do_replace(struct net *net, void __user *user, void *loc_cpu_entry; struct arpt_entry *iter; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ @@ -1269,7 +1267,8 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; loc_cpu_entry = newinfo->entries; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { + sockptr_advance(arg, sizeof(tmp)); + if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1401,7 +1400,8 @@ static int compat_get_entries(struct net *net, } #endif -static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, + unsigned int len) { int ret; @@ -1412,14 +1412,14 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned case ARPT_SO_SET_REPLACE: #ifdef CONFIG_COMPAT if (in_compat_syscall()) - ret = compat_do_replace(sock_net(sk), user, len); + ret = compat_do_replace(sock_net(sk), arg, len); else #endif - ret = do_replace(sock_net(sk), user, len); + ret = do_replace(sock_net(sk), arg, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), USER_SOCKPTR(user), len); + ret = do_add_counters(sock_net(sk), arg, len); break; default: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4697d09c98dc..f2a9680303d8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1102,7 +1102,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, } static int -do_replace(struct net *net, const void __user *user, unsigned int len) +do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ipt_replace tmp; @@ -1110,7 +1110,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ @@ -1126,8 +1126,8 @@ do_replace(struct net *net, const void __user *user, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { + sockptr_advance(arg, sizeof(tmp)); + if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1484,7 +1484,7 @@ out_unlock: } static int -compat_do_replace(struct net *net, void __user *user, unsigned int len) +compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ipt_replace tmp; @@ -1492,7 +1492,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) void *loc_cpu_entry; struct ipt_entry *iter; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ @@ -1508,8 +1508,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { + sockptr_advance(arg, sizeof(tmp)); + if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1610,7 +1610,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, #endif static int -do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; @@ -1621,14 +1621,14 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) case IPT_SO_SET_REPLACE: #ifdef CONFIG_COMPAT if (in_compat_syscall()) - ret = compat_do_replace(sock_net(sk), user, len); + ret = compat_do_replace(sock_net(sk), arg, len); else #endif - ret = do_replace(sock_net(sk), user, len); + ret = do_replace(sock_net(sk), arg, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), USER_SOCKPTR(user), len); + ret = do_add_counters(sock_net(sk), arg, len); break; default: diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56a74707c617..85892b35cff7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -996,7 +996,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && optname != IPV6_XFRM_POLICY) - err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); + err = nf_setsockopt(sk, PF_INET6, optname, USER_SOCKPTR(optval), + optlen); #endif return err; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a787aba30e2d..1d52957a413f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1119,7 +1119,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, } static int -do_replace(struct net *net, const void __user *user, unsigned int len) +do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ip6t_replace tmp; @@ -1127,7 +1127,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ @@ -1143,8 +1143,8 @@ do_replace(struct net *net, const void __user *user, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { + sockptr_advance(arg, sizeof(tmp)); + if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1493,7 +1493,7 @@ out_unlock: } static int -compat_do_replace(struct net *net, void __user *user, unsigned int len) +compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ip6t_replace tmp; @@ -1501,7 +1501,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) void *loc_cpu_entry; struct ip6t_entry *iter; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ @@ -1517,8 +1517,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { + sockptr_advance(arg, sizeof(tmp)); + if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1619,7 +1619,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, #endif static int -do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; @@ -1630,14 +1630,14 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) case IP6T_SO_SET_REPLACE: #ifdef CONFIG_COMPAT if (in_compat_syscall()) - ret = compat_do_replace(sock_net(sk), user, len); + ret = compat_do_replace(sock_net(sk), arg, len); else #endif - ret = do_replace(sock_net(sk), user, len); + ret = do_replace(sock_net(sk), arg, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sock_net(sk), USER_SOCKPTR(user), len); + ret = do_add_counters(sock_net(sk), arg, len); break; default: diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4af83f466dfc..bcac316addab 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2434,7 +2434,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, } static int -do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) { struct net *net = sock_net(sk); int ret; @@ -2458,7 +2458,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) return -EINVAL; } - if (copy_from_user(arg, user, len) != 0) + if (copy_from_sockptr(arg, ptr, len) != 0) return -EFAULT; /* Handle daemons since they have another lock */ diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 90469b1f628a..34afcd03b6f6 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -89,7 +89,7 @@ out: return ops; } -int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, sockptr_t opt, unsigned int len) { struct nf_sockopt_ops *ops; -- cgit From b03afaa82ece13b2a008f0e3a7127bead578e3e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:55 +0200 Subject: bpfilter: switch bpfilter_ip_set_sockopt to sockptr_t This is mostly to prepare for cleaning up the callers, as bpfilter by design can't handle kernel pointers. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/bpfilter/bpfilter_kern.c | 6 +++--- net/ipv4/bpfilter/sockopt.c | 8 ++++---- net/ipv4/ip_sockglue.c | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 00540457e5f4..f580c3344cb3 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -60,17 +60,17 @@ stop: } static int bpfilter_process_sockopt(struct sock *sk, int optname, - char __user *optval, unsigned int optlen, + sockptr_t optval, unsigned int optlen, bool is_set) { struct mbox_request req = { .is_set = is_set, .pid = current->pid, .cmd = optname, - .addr = (uintptr_t)optval, + .addr = (uintptr_t)optval.user, .len = optlen, }; - if (uaccess_kernel()) { + if (uaccess_kernel() || sockptr_is_kernel(optval)) { pr_err("kernel access not supported\n"); return -EFAULT; } diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 9063c6767d34..1b34cb9a7708 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -21,8 +21,7 @@ void bpfilter_umh_cleanup(struct umd_info *info) } EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup); -static int bpfilter_mbox_request(struct sock *sk, int optname, - char __user *optval, +static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen, bool is_set) { int err; @@ -52,7 +51,7 @@ out: return err; } -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { return bpfilter_mbox_request(sk, optname, optval, optlen, true); @@ -66,7 +65,8 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, if (get_user(len, optlen)) return -EFAULT; - return bpfilter_mbox_request(sk, optname, optval, len, false); + return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, + false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 42befbf12846..36f746e01741 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1414,7 +1414,8 @@ int ip_setsockopt(struct sock *sk, int level, #if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_SET_REPLACE && optname < BPFILTER_IPT_SET_MAX) - err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); + err = bpfilter_ip_set_sockopt(sk, optname, USER_SOCKPTR(optval), + optlen); #endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ -- cgit From 01ccb5b48f08f59ca3746d59221f4990aec1b194 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:56 +0200 Subject: net/ipv4: switch ip_mroute_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ipmr.c | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 36f746e01741..ac495b0cff8f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -925,7 +925,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optname == IP_ROUTER_ALERT) return ip_ra_control(sk, val ? 1 : 0, NULL); if (ip_mroute_opt(optname)) - return ip_mroute_setsockopt(sk, optname, optval, optlen); + return ip_mroute_setsockopt(sk, optname, USER_SOCKPTR(optval), + optlen); err = 0; if (needs_rtnl) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 678639c01e48..cdf3a40f9ff5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1341,7 +1341,7 @@ static void mrtsock_destruct(struct sock *sk) * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, +int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { struct net *net = sock_net(sk); @@ -1413,7 +1413,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, ret = -EINVAL; break; } - if (copy_from_user(&vif, optval, sizeof(vif))) { + if (copy_from_sockptr(&vif, optval, sizeof(vif))) { ret = -EFAULT; break; } @@ -1441,7 +1441,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, ret = -EINVAL; break; } - if (copy_from_user(&mfc, optval, sizeof(mfc))) { + if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } @@ -1459,7 +1459,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, ret = -EINVAL; break; } - if (get_user(val, (int __user *)optval)) { + if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } @@ -1471,7 +1471,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, ret = -EINVAL; break; } - if (get_user(val, (int __user *)optval)) { + if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } @@ -1486,7 +1486,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, ret = -EINVAL; break; } - if (get_user(val, (int __user *)optval)) { + if (copy_from_sockptr(&val, optval, sizeof(val))) { ret = -EFAULT; break; } @@ -1508,7 +1508,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, ret = -EINVAL; break; } - if (get_user(uval, (u32 __user *)optval)) { + if (copy_from_sockptr(&uval, optval, sizeof(uval))) { ret = -EFAULT; break; } -- cgit From de40a3e88311b6f0fc79b876a4768bf2d99f9aae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:57 +0200 Subject: net/ipv4: merge ip_options_get and ip_options_get_from_user Use the sockptr_t type to merge the versions. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 43 +++++++++++-------------------------------- net/ipv4/ip_sockglue.c | 7 ++++--- 2 files changed, 15 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ddaa01ec2bce..948747aac4e2 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -519,15 +519,20 @@ void ip_options_undo(struct ip_options *opt) } } -static struct ip_options_rcu *ip_options_get_alloc(const int optlen) +int ip_options_get(struct net *net, struct ip_options_rcu **optp, + sockptr_t data, int optlen) { - return kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), + struct ip_options_rcu *opt; + + opt = kzalloc(sizeof(struct ip_options_rcu) + ((optlen + 3) & ~3), GFP_KERNEL); -} + if (!opt) + return -ENOMEM; + if (optlen && copy_from_sockptr(opt->opt.__data, data, optlen)) { + kfree(opt); + return -EFAULT; + } -static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, - struct ip_options_rcu *opt, int optlen) -{ while (optlen & 3) opt->opt.__data[optlen++] = IPOPT_END; opt->opt.optlen = optlen; @@ -540,32 +545,6 @@ static int ip_options_get_finish(struct net *net, struct ip_options_rcu **optp, return 0; } -int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp, - unsigned char __user *data, int optlen) -{ - struct ip_options_rcu *opt = ip_options_get_alloc(optlen); - - if (!opt) - return -ENOMEM; - if (optlen && copy_from_user(opt->opt.__data, data, optlen)) { - kfree(opt); - return -EFAULT; - } - return ip_options_get_finish(net, optp, opt, optlen); -} - -int ip_options_get(struct net *net, struct ip_options_rcu **optp, - unsigned char *data, int optlen) -{ - struct ip_options_rcu *opt = ip_options_get_alloc(optlen); - - if (!opt) - return -ENOMEM; - if (optlen) - memcpy(opt->opt.__data, data, optlen); - return ip_options_get_finish(net, optp, opt, optlen); -} - void ip_forward_options(struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ac495b0cff8f..b12f39b52008 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -280,7 +280,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, err = cmsg->cmsg_len - sizeof(struct cmsghdr); /* Our caller is responsible for freeing ipc->opt */ - err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), + err = ip_options_get(net, &ipc->opt, + KERNEL_SOCKPTR(CMSG_DATA(cmsg)), err < 40 ? err : 40); if (err) return err; @@ -940,8 +941,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optlen > 40) goto e_inval; - err = ip_options_get_from_user(sock_net(sk), &opt, - optval, optlen); + err = ip_options_get(sock_net(sk), &opt, USER_SOCKPTR(optval), + optlen); if (err) break; old = rcu_dereference_protected(inet->inet_opt, -- cgit From 89654c5fcd511d9fe26ec376f5e855581aa44802 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:58 +0200 Subject: net/ipv4: switch do_ip_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 68 ++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b12f39b52008..f7f1507b89fe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -683,15 +683,15 @@ Eaddrnotavail: return -EADDRNOTAVAIL; } -static int copy_group_source_from_user(struct group_source_req *greqs, - void __user *optval, int optlen) +static int copy_group_source_from_sockptr(struct group_source_req *greqs, + sockptr_t optval, int optlen) { if (in_compat_syscall()) { struct compat_group_source_req gr32; if (optlen != sizeof(gr32)) return -EINVAL; - if (copy_from_user(&gr32, optval, sizeof(gr32))) + if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; greqs->gsr_interface = gr32.gsr_interface; greqs->gsr_group = gr32.gsr_group; @@ -699,7 +699,7 @@ static int copy_group_source_from_user(struct group_source_req *greqs, } else { if (optlen != sizeof(*greqs)) return -EINVAL; - if (copy_from_user(greqs, optval, sizeof(*greqs))) + if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) return -EFAULT; } @@ -707,14 +707,14 @@ static int copy_group_source_from_user(struct group_source_req *greqs, } static int do_mcast_group_source(struct sock *sk, int optname, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct group_source_req greqs; struct ip_mreq_source mreqs; struct sockaddr_in *psin; int omode, add, err; - err = copy_group_source_from_user(&greqs, optval, optlen); + err = copy_group_source_from_sockptr(&greqs, optval, optlen); if (err) return err; @@ -754,8 +754,7 @@ static int do_mcast_group_source(struct sock *sk, int optname, return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface); } -static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval, - int optlen) +static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { struct group_filter *gsf = NULL; int err; @@ -765,7 +764,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, void __user *optval, if (optlen > sysctl_optmem_max) return -ENOBUFS; - gsf = memdup_user(optval, optlen); + gsf = memdup_sockptr(optval, optlen); if (IS_ERR(gsf)) return PTR_ERR(gsf); @@ -786,7 +785,7 @@ out_free_gsf: return err; } -static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval, +static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { const int size0 = offsetof(struct compat_group_filter, gf_slist); @@ -806,7 +805,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, void __user *optval, gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ err = -EFAULT; - if (copy_from_user(gf32, optval, optlen)) + if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_gsf; /* numsrc >= (4G-140)/128 overflow in 32 bits */ @@ -831,7 +830,7 @@ out_free_gsf: } static int ip_mcast_join_leave(struct sock *sk, int optname, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct ip_mreqn mreq = { }; struct sockaddr_in *psin; @@ -839,7 +838,7 @@ static int ip_mcast_join_leave(struct sock *sk, int optname, if (optlen < sizeof(struct group_req)) return -EINVAL; - if (copy_from_user(&greq, optval, sizeof(greq))) + if (copy_from_sockptr(&greq, optval, sizeof(greq))) return -EFAULT; psin = (struct sockaddr_in *)&greq.gr_group; @@ -853,7 +852,7 @@ static int ip_mcast_join_leave(struct sock *sk, int optname, } static int compat_ip_mcast_join_leave(struct sock *sk, int optname, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct compat_group_req greq; struct ip_mreqn mreq = { }; @@ -861,7 +860,7 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, if (optlen < sizeof(struct compat_group_req)) return -EINVAL; - if (copy_from_user(&greq, optval, sizeof(greq))) + if (copy_from_sockptr(&greq, optval, sizeof(greq))) return -EFAULT; psin = (struct sockaddr_in *)&greq.gr_group; @@ -875,8 +874,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname, return ip_mc_leave_group(sk, &mreq); } -static int do_ip_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +static int do_ip_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); @@ -910,12 +909,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_RECVFRAGSIZE: case IP_RECVERR_RFC4884: if (optlen >= sizeof(int)) { - if (get_user(val, (int __user *) optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; } else if (optlen >= sizeof(char)) { unsigned char ucval; - if (get_user(ucval, (unsigned char __user *) optval)) + if (copy_from_sockptr(&ucval, optval, sizeof(ucval))) return -EFAULT; val = (int) ucval; } @@ -926,8 +925,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optname == IP_ROUTER_ALERT) return ip_ra_control(sk, val ? 1 : 0, NULL); if (ip_mroute_opt(optname)) - return ip_mroute_setsockopt(sk, optname, USER_SOCKPTR(optval), - optlen); + return ip_mroute_setsockopt(sk, optname, optval, optlen); err = 0; if (needs_rtnl) @@ -941,8 +939,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optlen > 40) goto e_inval; - err = ip_options_get(sock_net(sk), &opt, USER_SOCKPTR(optval), - optlen); + err = ip_options_get(sock_net(sk), &opt, optval, optlen); if (err) break; old = rcu_dereference_protected(inet->inet_opt, @@ -1140,17 +1137,17 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq, optval, sizeof(mreq))) + if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); if (optlen >= sizeof(struct ip_mreq)) { - if (copy_from_user(&mreq, optval, - sizeof(struct ip_mreq))) + if (copy_from_sockptr(&mreq, optval, + sizeof(struct ip_mreq))) break; } else if (optlen >= sizeof(struct in_addr)) { - if (copy_from_user(&mreq.imr_address, optval, - sizeof(struct in_addr))) + if (copy_from_sockptr(&mreq.imr_address, optval, + sizeof(struct in_addr))) break; } } @@ -1202,11 +1199,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto e_inval; err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { - if (copy_from_user(&mreq, optval, sizeof(mreq))) + if (copy_from_sockptr(&mreq, optval, sizeof(mreq))) break; } else { memset(&mreq, 0, sizeof(mreq)); - if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq))) + if (copy_from_sockptr(&mreq, optval, + sizeof(struct ip_mreq))) break; } @@ -1226,7 +1224,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -ENOBUFS; break; } - msf = memdup_user(optval, optlen); + msf = memdup_sockptr(optval, optlen); if (IS_ERR(msf)) { err = PTR_ERR(msf); break; @@ -1257,7 +1255,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (optlen != sizeof(struct ip_mreq_source)) goto e_inval; - if (copy_from_user(&mreqs, optval, sizeof(mreqs))) { + if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) { err = -EFAULT; break; } @@ -1324,8 +1322,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EPERM; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) break; - err = xfrm_user_policy(sk, optname, USER_SOCKPTR(optval), - optlen); + err = xfrm_user_policy(sk, optname, optval, optlen); break; case IP_TRANSPARENT: @@ -1412,7 +1409,8 @@ int ip_setsockopt(struct sock *sk, int level, if (level != SOL_IP) return -ENOPROTOOPT; - err = do_ip_setsockopt(sk, level, optname, optval, optlen); + err = do_ip_setsockopt(sk, level, optname, USER_SOCKPTR(optval), + optlen); #if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_SET_REPLACE && optname < BPFILTER_IPT_SET_MAX) -- cgit From b43c6153132c92745317f92174af84f57b160b76 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:08:59 +0200 Subject: net/ipv6: switch ip6_mroute_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 17 +++++++++-------- net/ipv6/ipv6_sockglue.c | 3 ++- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1f4d20e97c07..06b0d2c329b9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1629,7 +1629,8 @@ EXPORT_SYMBOL(mroute6_is_socket); * MOSPF/PIM router set up we can clean this up. */ -int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) +int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, + unsigned int optlen) { int ret, parent = 0; struct mif6ctl vif; @@ -1665,7 +1666,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_ADD_MIF: if (optlen < sizeof(vif)) return -EINVAL; - if (copy_from_user(&vif, optval, sizeof(vif))) + if (copy_from_sockptr(&vif, optval, sizeof(vif))) return -EFAULT; if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; @@ -1678,7 +1679,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_DEL_MIF: if (optlen < sizeof(mifi_t)) return -EINVAL; - if (copy_from_user(&mifi, optval, sizeof(mifi_t))) + if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); ret = mif6_delete(mrt, mifi, 0, NULL); @@ -1697,7 +1698,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) return -EINVAL; - if (copy_from_user(&mfc, optval, sizeof(mfc))) + if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) return -EFAULT; if (parent == 0) parent = mfc.mf6cc_parent; @@ -1718,7 +1719,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (optlen != sizeof(flags)) return -EINVAL; - if (get_user(flags, (int __user *)optval)) + if (copy_from_sockptr(&flags, optval, sizeof(flags))) return -EFAULT; rtnl_lock(); mroute_clean_tables(mrt, flags); @@ -1735,7 +1736,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (optlen != sizeof(v)) return -EINVAL; - if (get_user(v, (int __user *)optval)) + if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; mrt->mroute_do_assert = v; return 0; @@ -1748,7 +1749,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (optlen != sizeof(v)) return -EINVAL; - if (get_user(v, (int __user *)optval)) + if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; v = !!v; rtnl_lock(); @@ -1769,7 +1770,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (optlen != sizeof(u32)) return -EINVAL; - if (get_user(v, (u32 __user *)optval)) + if (copy_from_sockptr(&v, optval, sizeof(v))) return -EFAULT; /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ if (v != RT_TABLE_DEFAULT && v >= 100000000) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 85892b35cff7..119dfaf5f4bb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -337,7 +337,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, valbool = (val != 0); if (ip6_mroute_opt(optname)) - return ip6_mroute_setsockopt(sk, optname, optval, optlen); + return ip6_mroute_setsockopt(sk, optname, USER_SOCKPTR(optval), + optlen); if (needs_rtnl) rtnl_lock(); -- cgit From ff6a4cf214effa0f600fff0eff70c60bcab94ecb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:00 +0200 Subject: net/ipv6: split up ipv6_flowlabel_opt Split ipv6_flowlabel_opt into a subfunction for each action and a small wrapper. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 309 +++++++++++++++++++++++++---------------------- 1 file changed, 166 insertions(+), 143 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index ce4fbba4acce..817b4f379009 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -533,187 +533,210 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return -ENOENT; } -int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) +#define socklist_dereference(__sflp) \ + rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock)) + +static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) { - int uninitialized_var(err); - struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_flowlabel_req freq; - struct ipv6_fl_socklist *sfl1 = NULL; - struct ipv6_fl_socklist *sfl; struct ipv6_fl_socklist __rcu **sflp; - struct ip6_flowlabel *fl, *fl1 = NULL; + struct ipv6_fl_socklist *sfl; + if (freq->flr_flags & IPV6_FL_F_REFLECT) { + if (sk->sk_protocol != IPPROTO_TCP) + return -ENOPROTOOPT; + if (!np->repflow) + return -ESRCH; + np->flow_label = 0; + np->repflow = 0; + return 0; + } - if (optlen < sizeof(freq)) - return -EINVAL; + spin_lock_bh(&ip6_sk_fl_lock); + for (sflp = &np->ipv6_fl_list; + (sfl = socklist_dereference(*sflp)) != NULL; + sflp = &sfl->next) { + if (sfl->fl->label == freq->flr_label) + goto found; + } + spin_unlock_bh(&ip6_sk_fl_lock); + return -ESRCH; +found: + if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK)) + np->flow_label &= ~IPV6_FLOWLABEL_MASK; + *sflp = sfl->next; + spin_unlock_bh(&ip6_sk_fl_lock); + fl_release(sfl->fl); + kfree_rcu(sfl, rcu); + return 0; +} - if (copy_from_user(&freq, optval, sizeof(freq))) - return -EFAULT; +static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); + struct ipv6_fl_socklist *sfl; + int err; - switch (freq.flr_action) { - case IPV6_FL_A_PUT: - if (freq.flr_flags & IPV6_FL_F_REFLECT) { - if (sk->sk_protocol != IPPROTO_TCP) - return -ENOPROTOOPT; - if (!np->repflow) - return -ESRCH; - np->flow_label = 0; - np->repflow = 0; - return 0; - } - spin_lock_bh(&ip6_sk_fl_lock); - for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference_protected(*sflp, - lockdep_is_held(&ip6_sk_fl_lock))) != NULL; - sflp = &sfl->next) { - if (sfl->fl->label == freq.flr_label) { - if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) - np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = sfl->next; - spin_unlock_bh(&ip6_sk_fl_lock); - fl_release(sfl->fl); - kfree_rcu(sfl, rcu); - return 0; - } + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { + if (sfl->fl->label == freq->flr_label) { + err = fl6_renew(sfl->fl, freq->flr_linger, + freq->flr_expires); + rcu_read_unlock_bh(); + return err; } - spin_unlock_bh(&ip6_sk_fl_lock); - return -ESRCH; + } + rcu_read_unlock_bh(); - case IPV6_FL_A_RENEW: - rcu_read_lock_bh(); - for_each_sk_fl_rcu(np, sfl) { - if (sfl->fl->label == freq.flr_label) { - err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); - rcu_read_unlock_bh(); - return err; - } - } - rcu_read_unlock_bh(); + if (freq->flr_share == IPV6_FL_S_NONE && + ns_capable(net->user_ns, CAP_NET_ADMIN)) { + struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label); - if (freq.flr_share == IPV6_FL_S_NONE && - ns_capable(net->user_ns, CAP_NET_ADMIN)) { - fl = fl_lookup(net, freq.flr_label); - if (fl) { - err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); - fl_release(fl); - return err; - } + if (fl) { + err = fl6_renew(fl, freq->flr_linger, + freq->flr_expires); + fl_release(fl); + return err; } - return -ESRCH; - - case IPV6_FL_A_GET: - if (freq.flr_flags & IPV6_FL_F_REFLECT) { - struct net *net = sock_net(sk); - if (net->ipv6.sysctl.flowlabel_consistency) { - net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); - return -EPERM; - } + } + return -ESRCH; +} - if (sk->sk_protocol != IPPROTO_TCP) - return -ENOPROTOOPT; +static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, + void __user *optval, int optlen) +{ + struct ipv6_fl_socklist *sfl, *sfl1 = NULL; + struct ip6_flowlabel *fl, *fl1 = NULL; + struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); + int uninitialized_var(err); - np->repflow = 1; - return 0; + if (freq->flr_flags & IPV6_FL_F_REFLECT) { + if (net->ipv6.sysctl.flowlabel_consistency) { + net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); + return -EPERM; } - if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) - return -EINVAL; + if (sk->sk_protocol != IPPROTO_TCP) + return -ENOPROTOOPT; + np->repflow = 1; + return 0; + } - if (net->ipv6.sysctl.flowlabel_state_ranges && - (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) - return -ERANGE; + if (freq->flr_label & ~IPV6_FLOWLABEL_MASK) + return -EINVAL; + if (net->ipv6.sysctl.flowlabel_state_ranges && + (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) + return -ERANGE; - fl = fl_create(net, sk, &freq, optval, optlen, &err); - if (!fl) - return err; - sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); + fl = fl_create(net, sk, freq, optval, optlen, &err); + if (!fl) + return err; - if (freq.flr_label) { - err = -EEXIST; - rcu_read_lock_bh(); - for_each_sk_fl_rcu(np, sfl) { - if (sfl->fl->label == freq.flr_label) { - if (freq.flr_flags&IPV6_FL_F_EXCL) { - rcu_read_unlock_bh(); - goto done; - } - fl1 = sfl->fl; - if (!atomic_inc_not_zero(&fl1->users)) - fl1 = NULL; - break; + sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); + + if (freq->flr_label) { + err = -EEXIST; + rcu_read_lock_bh(); + for_each_sk_fl_rcu(np, sfl) { + if (sfl->fl->label == freq->flr_label) { + if (freq->flr_flags & IPV6_FL_F_EXCL) { + rcu_read_unlock_bh(); + goto done; } + fl1 = sfl->fl; + if (!atomic_inc_not_zero(&fl1->users)) + fl1 = NULL; + break; } - rcu_read_unlock_bh(); + } + rcu_read_unlock_bh(); - if (!fl1) - fl1 = fl_lookup(net, freq.flr_label); - if (fl1) { + if (!fl1) + fl1 = fl_lookup(net, freq->flr_label); + if (fl1) { recheck: - err = -EEXIST; - if (freq.flr_flags&IPV6_FL_F_EXCL) - goto release; - err = -EPERM; - if (fl1->share == IPV6_FL_S_EXCL || - fl1->share != fl->share || - ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid != fl->owner.pid)) || - ((fl1->share == IPV6_FL_S_USER) && - !uid_eq(fl1->owner.uid, fl->owner.uid))) - goto release; - - err = -ENOMEM; - if (!sfl1) - goto release; - if (fl->linger > fl1->linger) - fl1->linger = fl->linger; - if ((long)(fl->expires - fl1->expires) > 0) - fl1->expires = fl->expires; - fl_link(np, sfl1, fl1); - fl_free(fl); - return 0; + err = -EEXIST; + if (freq->flr_flags&IPV6_FL_F_EXCL) + goto release; + err = -EPERM; + if (fl1->share == IPV6_FL_S_EXCL || + fl1->share != fl->share || + ((fl1->share == IPV6_FL_S_PROCESS) && + (fl1->owner.pid != fl->owner.pid)) || + ((fl1->share == IPV6_FL_S_USER) && + !uid_eq(fl1->owner.uid, fl->owner.uid))) + goto release; + + err = -ENOMEM; + if (!sfl1) + goto release; + if (fl->linger > fl1->linger) + fl1->linger = fl->linger; + if ((long)(fl->expires - fl1->expires) > 0) + fl1->expires = fl->expires; + fl_link(np, sfl1, fl1); + fl_free(fl); + return 0; release: - fl_release(fl1); - goto done; - } - } - err = -ENOENT; - if (!(freq.flr_flags&IPV6_FL_F_CREATE)) + fl_release(fl1); goto done; + } + } + err = -ENOENT; + if (!(freq->flr_flags & IPV6_FL_F_CREATE)) + goto done; - err = -ENOMEM; - if (!sfl1) - goto done; + err = -ENOMEM; + if (!sfl1) + goto done; - err = mem_check(sk); - if (err != 0) - goto done; + err = mem_check(sk); + if (err != 0) + goto done; - fl1 = fl_intern(net, fl, freq.flr_label); - if (fl1) - goto recheck; + fl1 = fl_intern(net, fl, freq->flr_label); + if (fl1) + goto recheck; - if (!freq.flr_label) { - if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, - &fl->label, sizeof(fl->label))) { - /* Intentionally ignore fault. */ - } + if (!freq->flr_label) { + if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, + &fl->label, sizeof(fl->label))) { + /* Intentionally ignore fault. */ } - - fl_link(np, sfl1, fl); - return 0; - - default: - return -EINVAL; } + fl_link(np, sfl1, fl); + return 0; done: fl_free(fl); kfree(sfl1); return err; } +int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) +{ + struct in6_flowlabel_req freq; + + if (optlen < sizeof(freq)) + return -EINVAL; + if (copy_from_user(&freq, optval, sizeof(freq))) + return -EFAULT; + + switch (freq.flr_action) { + case IPV6_FL_A_PUT: + return ipv6_flowlabel_put(sk, &freq); + case IPV6_FL_A_RENEW: + return ipv6_flowlabel_renew(sk, &freq); + case IPV6_FL_A_GET: + return ipv6_flowlabel_get(sk, &freq, optval, optlen); + default: + return -EINVAL; + } +} + #ifdef CONFIG_PROC_FS struct ip6fl_iter_state { -- cgit From 86298285c9ae3a41ce21c2d00ebdde51dd2abc73 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:01 +0200 Subject: net/ipv6: switch ipv6_flowlabel_opt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Note that the get case is pretty weird in that it actually copies data back to userspace from setsockopt. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 16 +++++++++------- net/ipv6/ipv6_sockglue.c | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 817b4f379009..215b6f5e733e 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -371,7 +371,7 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo static struct ip6_flowlabel * fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, - char __user *optval, int optlen, int *err_p) + sockptr_t optval, int optlen, int *err_p) { struct ip6_flowlabel *fl = NULL; int olen; @@ -401,7 +401,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, memset(fl->opt, 0, sizeof(*fl->opt)); fl->opt->tot_len = sizeof(*fl->opt) + olen; err = -EFAULT; - if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen)) + sockptr_advance(optval, CMSG_ALIGN(sizeof(*freq))); + if (copy_from_sockptr(fl->opt + 1, optval, olen)) goto done; msg.msg_controllen = olen; @@ -604,7 +605,7 @@ static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq) } static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct ipv6_fl_socklist *sfl, *sfl1 = NULL; struct ip6_flowlabel *fl, *fl1 = NULL; @@ -702,8 +703,9 @@ release: goto recheck; if (!freq->flr_label) { - if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, - &fl->label, sizeof(fl->label))) { + sockptr_advance(optval, + offsetof(struct in6_flowlabel_req, flr_label)); + if (copy_to_sockptr(optval, &fl->label, sizeof(fl->label))) { /* Intentionally ignore fault. */ } } @@ -716,13 +718,13 @@ done: return err; } -int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) +int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen) { struct in6_flowlabel_req freq; if (optlen < sizeof(freq)) return -EINVAL; - if (copy_from_user(&freq, optval, sizeof(freq))) + if (copy_from_sockptr(&freq, optval, sizeof(freq))) return -EFAULT; switch (freq.flr_action) { diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 119dfaf5f4bb..3897fb55372d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -929,7 +929,7 @@ done: retv = 0; break; case IPV6_FLOWLABEL_MGR: - retv = ipv6_flowlabel_opt(sk, optval, optlen); + retv = ipv6_flowlabel_opt(sk, USER_SOCKPTR(optval), optlen); break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: -- cgit From b84d2b73af401dc00d9f03c5d78ccf71369a5707 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:02 +0200 Subject: net/ipv6: factor out a ipv6_set_opt_hdr helper Factour out a helper to set the IPv6 option headers from do_ipv6_setsockopt. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 150 +++++++++++++++++++++++------------------------ 1 file changed, 75 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3897fb55372d..90442c8366df 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -315,6 +315,80 @@ static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr); } +static int ipv6_set_opt_hdr(struct sock *sk, int optname, void __user *optval, + int optlen) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_opt_hdr *new = NULL; + struct net *net = sock_net(sk); + struct ipv6_txoptions *opt; + int err; + + /* hop-by-hop / destination options are privileged option */ + if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) + return -EPERM; + + /* remove any sticky options header with a zero option + * length, per RFC3542. + */ + if (optlen > 0) { + if (!optval) + return -EINVAL; + if (optlen < sizeof(struct ipv6_opt_hdr) || + optlen & 0x7 || + optlen > 8 * 255) + return -EINVAL; + + new = memdup_user(optval, optlen); + if (IS_ERR(new)) + return PTR_ERR(new); + if (unlikely(ipv6_optlen(new) > optlen)) { + kfree(new); + return -EINVAL; + } + } + + opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); + opt = ipv6_renew_options(sk, opt, optname, new); + kfree(new); + if (IS_ERR(opt)) + return PTR_ERR(opt); + + /* routing header option needs extra check */ + err = -EINVAL; + if (optname == IPV6_RTHDR && opt && opt->srcrt) { + struct ipv6_rt_hdr *rthdr = opt->srcrt; + switch (rthdr->type) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) + goto sticky_done; + break; +#endif + case IPV6_SRCRT_TYPE_4: + { + struct ipv6_sr_hdr *srh = + (struct ipv6_sr_hdr *)opt->srcrt; + + if (!seg6_validate_srh(srh, optlen, false)) + goto sticky_done; + break; + } + default: + goto sticky_done; + } + } + + err = 0; + opt = ipv6_update_options(sk, opt); +sticky_done: + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } + return err; +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -580,82 +654,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: - { - struct ipv6_txoptions *opt; - struct ipv6_opt_hdr *new = NULL; - - /* hop-by-hop / destination options are privileged option */ - retv = -EPERM; - if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) - break; - - /* remove any sticky options header with a zero option - * length, per RFC3542. - */ - if (optlen == 0) - optval = NULL; - else if (!optval) - goto e_inval; - else if (optlen < sizeof(struct ipv6_opt_hdr) || - optlen & 0x7 || optlen > 8 * 255) - goto e_inval; - else { - new = memdup_user(optval, optlen); - if (IS_ERR(new)) { - retv = PTR_ERR(new); - break; - } - if (unlikely(ipv6_optlen(new) > optlen)) { - kfree(new); - goto e_inval; - } - } - - opt = rcu_dereference_protected(np->opt, - lockdep_sock_is_held(sk)); - opt = ipv6_renew_options(sk, opt, optname, new); - kfree(new); - if (IS_ERR(opt)) { - retv = PTR_ERR(opt); - break; - } - - /* routing header option needs extra check */ - retv = -EINVAL; - if (optname == IPV6_RTHDR && opt && opt->srcrt) { - struct ipv6_rt_hdr *rthdr = opt->srcrt; - switch (rthdr->type) { -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPV6_SRCRT_TYPE_2: - if (rthdr->hdrlen != 2 || - rthdr->segments_left != 1) - goto sticky_done; - - break; -#endif - case IPV6_SRCRT_TYPE_4: - { - struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *) - opt->srcrt; - - if (!seg6_validate_srh(srh, optlen, false)) - goto sticky_done; - break; - } - default: - goto sticky_done; - } - } - - retv = 0; - opt = ipv6_update_options(sk, opt); -sticky_done: - if (opt) { - atomic_sub(opt->tot_len, &sk->sk_omem_alloc); - txopt_put(opt); - } + retv = ipv6_set_opt_hdr(sk, optname, optval, optlen); break; - } case IPV6_PKTINFO: { -- cgit From 894cfbc0cf3ea447e486fca99852893a155b6a81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:03 +0200 Subject: net/ipv6: switch do_ipv6_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 66 ++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 90442c8366df..dcd000a5a9b1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -136,15 +136,15 @@ static bool setsockopt_needs_rtnl(int optname) return false; } -static int copy_group_source_from_user(struct group_source_req *greqs, - void __user *optval, int optlen) +static int copy_group_source_from_sockptr(struct group_source_req *greqs, + sockptr_t optval, int optlen) { if (in_compat_syscall()) { struct compat_group_source_req gr32; if (optlen < sizeof(gr32)) return -EINVAL; - if (copy_from_user(&gr32, optval, sizeof(gr32))) + if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; greqs->gsr_interface = gr32.gsr_interface; greqs->gsr_group = gr32.gsr_group; @@ -152,7 +152,7 @@ static int copy_group_source_from_user(struct group_source_req *greqs, } else { if (optlen < sizeof(*greqs)) return -EINVAL; - if (copy_from_user(greqs, optval, sizeof(*greqs))) + if (copy_from_sockptr(greqs, optval, sizeof(*greqs))) return -EFAULT; } @@ -160,13 +160,13 @@ static int copy_group_source_from_user(struct group_source_req *greqs, } static int do_ipv6_mcast_group_source(struct sock *sk, int optname, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct group_source_req greqs; int omode, add; int ret; - ret = copy_group_source_from_user(&greqs, optval, optlen); + ret = copy_group_source_from_sockptr(&greqs, optval, optlen); if (ret) return ret; @@ -200,7 +200,7 @@ static int do_ipv6_mcast_group_source(struct sock *sk, int optname, return ip6_mc_source(add, omode, sk, &greqs); } -static int ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, +static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { struct group_filter *gsf; @@ -211,7 +211,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, if (optlen > sysctl_optmem_max) return -ENOBUFS; - gsf = memdup_user(optval, optlen); + gsf = memdup_sockptr(optval, optlen); if (IS_ERR(gsf)) return PTR_ERR(gsf); @@ -231,7 +231,7 @@ out_free_gsf: return ret; } -static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, +static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { const int size0 = offsetof(struct compat_group_filter, gf_slist); @@ -251,7 +251,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, void __user *optval, gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ ret = -EFAULT; - if (copy_from_user(gf32, optval, optlen)) + if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; /* numsrc >= (4G-140)/128 overflow in 32 bits */ @@ -276,14 +276,14 @@ out_free_p: } static int ipv6_mcast_join_leave(struct sock *sk, int optname, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct sockaddr_in6 *psin6; struct group_req greq; if (optlen < sizeof(greq)) return -EINVAL; - if (copy_from_user(&greq, optval, sizeof(greq))) + if (copy_from_sockptr(&greq, optval, sizeof(greq))) return -EFAULT; if (greq.gr_group.ss_family != AF_INET6) @@ -296,14 +296,14 @@ static int ipv6_mcast_join_leave(struct sock *sk, int optname, } static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, - void __user *optval, int optlen) + sockptr_t optval, int optlen) { struct compat_group_req gr32; struct sockaddr_in6 *psin6; if (optlen < sizeof(gr32)) return -EINVAL; - if (copy_from_user(&gr32, optval, sizeof(gr32))) + if (copy_from_sockptr(&gr32, optval, sizeof(gr32))) return -EFAULT; if (gr32.gr_group.ss_family != AF_INET6) @@ -315,7 +315,7 @@ static int compat_ipv6_mcast_join_leave(struct sock *sk, int optname, return ipv6_sock_mc_drop(sk, gr32.gr_interface, &psin6->sin6_addr); } -static int ipv6_set_opt_hdr(struct sock *sk, int optname, void __user *optval, +static int ipv6_set_opt_hdr(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -332,14 +332,14 @@ static int ipv6_set_opt_hdr(struct sock *sk, int optname, void __user *optval, * length, per RFC3542. */ if (optlen > 0) { - if (!optval) + if (sockptr_is_null(optval)) return -EINVAL; if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) return -EINVAL; - new = memdup_user(optval, optlen); + new = memdup_sockptr(optval, optlen); if (IS_ERR(new)) return PTR_ERR(new); if (unlikely(ipv6_optlen(new) > optlen)) { @@ -390,7 +390,7 @@ sticky_done: } static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); @@ -398,11 +398,11 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, int retv = -ENOPROTOOPT; bool needs_rtnl = setsockopt_needs_rtnl(optname); - if (!optval) + if (sockptr_is_null(optval)) val = 0; else { if (optlen >= sizeof(int)) { - if (get_user(val, (int __user *) optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; } else val = 0; @@ -411,8 +411,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, valbool = (val != 0); if (ip6_mroute_opt(optname)) - return ip6_mroute_setsockopt(sk, optname, USER_SOCKPTR(optval), - optlen); + return ip6_mroute_setsockopt(sk, optname, optval, optlen); if (needs_rtnl) rtnl_lock(); @@ -663,12 +662,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen == 0) goto e_inval; - else if (optlen < sizeof(struct in6_pktinfo) || !optval) + else if (optlen < sizeof(struct in6_pktinfo) || + sockptr_is_null(optval)) goto e_inval; - if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { - retv = -EFAULT; - break; + if (copy_from_sockptr(&pkt, optval, sizeof(pkt))) { + retv = -EFAULT; + break; } if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex)) goto e_inval; @@ -709,7 +709,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, refcount_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; - if (copy_from_user(opt+1, optval, optlen)) + if (copy_from_sockptr(opt + 1, optval, optlen)) goto done; msg.msg_controllen = optlen; @@ -831,7 +831,7 @@ done: break; retv = -EFAULT; - if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) + if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_ADD_MEMBERSHIP) @@ -849,7 +849,7 @@ done: goto e_inval; retv = -EFAULT; - if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) + if (copy_from_sockptr(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_JOIN_ANYCAST) @@ -929,15 +929,14 @@ done: retv = 0; break; case IPV6_FLOWLABEL_MGR: - retv = ipv6_flowlabel_opt(sk, USER_SOCKPTR(optval), optlen); + retv = ipv6_flowlabel_opt(sk, optval, optlen); break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: retv = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; - retv = xfrm_user_policy(sk, optname, USER_SOCKPTR(optval), - optlen); + retv = xfrm_user_policy(sk, optname, optval, optlen); break; case IPV6_ADDR_PREFERENCES: @@ -992,7 +991,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); + err = do_ipv6_setsockopt(sk, level, optname, USER_SOCKPTR(optval), + optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && -- cgit From 91ac1ccaff597d06b1e16801e1a4c99b8a78dcbe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:04 +0200 Subject: net/udp: switch udp_lib_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 ++++--- net/ipv6/udp.c | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bb95cddcb040..c6cb2d09dbc7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2588,7 +2588,7 @@ void udp_destroy_sock(struct sock *sk) * Socket option code for UDP */ int udp_lib_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen, + sockptr_t optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); @@ -2599,7 +2599,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; valbool = val ? 1 : 0; @@ -2707,7 +2707,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, + return udp_lib_setsockopt(sk, level, optname, + USER_SOCKPTR(optval), optlen, udp_push_pending_frames); return ip_setsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c1143feb2bf..2df1e6c9d7cb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1622,7 +1622,8 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, + return udp_lib_setsockopt(sk, level, optname, + USER_SOCKPTR(optval), optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } -- cgit From d4c19c49142ddb2efcc34cff6379d03edb3553bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:05 +0200 Subject: net/tcp: switch ->md5_parse to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 58ede3d62b2e..49bf15c27dea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3249,7 +3249,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: - err = tp->af_specific->md5_parse(sk, optname, optval, optlen); + err = tp->af_specific->md5_parse(sk, optname, + USER_SOCKPTR(optval), optlen); break; #endif case TCP_USER_TIMEOUT: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index daa39d33702b..f8913923a6c0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1195,7 +1195,7 @@ static void tcp_clear_md5_list(struct sock *sk) } static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, - char __user *optval, int optlen) + sockptr_t optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; @@ -1206,7 +1206,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, if (optlen < sizeof(cmd)) return -EINVAL; - if (copy_from_user(&cmd, optval, sizeof(cmd))) + if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) return -EFAULT; if (sin->sin_family != AF_INET) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c34b7834fd84..305870a72352 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -567,7 +567,7 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, } static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, - char __user *optval, int optlen) + sockptr_t optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; @@ -577,7 +577,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (optlen < sizeof(cmd)) return -EINVAL; - if (copy_from_user(&cmd, optval, sizeof(cmd))) + if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) return -EFAULT; if (sin6->sin6_family != AF_INET6) -- cgit From d38d2b00ba64b3f2f30d70a7929000606d2c4509 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:06 +0200 Subject: net/tcp: switch do_tcp_setsockopt to sockptr_t Pass a sockptr_t to prepare for set_fs-less handling of the kernel pointer from bpf-cgroup. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 49bf15c27dea..71cbc61c335f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2764,7 +2764,7 @@ static inline bool tcp_can_repair_sock(const struct sock *sk) (sk->sk_state != TCP_LISTEN); } -static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) +static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len) { struct tcp_repair_window opt; @@ -2774,7 +2774,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l if (len != sizeof(opt)) return -EINVAL; - if (copy_from_user(&opt, optbuf, sizeof(opt))) + if (copy_from_sockptr(&opt, optbuf, sizeof(opt))) return -EFAULT; if (opt.max_window < opt.snd_wnd) @@ -2796,17 +2796,17 @@ static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int l return 0; } -static int tcp_repair_options_est(struct sock *sk, - struct tcp_repair_opt __user *optbuf, unsigned int len) +static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, + unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_repair_opt opt; while (len >= sizeof(opt)) { - if (copy_from_user(&opt, optbuf, sizeof(opt))) + if (copy_from_sockptr(&opt, optbuf, sizeof(opt))) return -EFAULT; - optbuf++; + sockptr_advance(optbuf, sizeof(opt)); len -= sizeof(opt); switch (opt.opt_code) { @@ -3020,8 +3020,8 @@ EXPORT_SYMBOL(tcp_sock_set_keepcnt); /* * Socket option code for TCP. */ -static int do_tcp_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +static int do_tcp_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -3037,7 +3037,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < 1) return -EINVAL; - val = strncpy_from_user(name, optval, + val = strncpy_from_sockptr(name, optval, min_t(long, TCP_CA_NAME_MAX-1, optlen)); if (val < 0) return -EFAULT; @@ -3056,7 +3056,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < 1) return -EINVAL; - val = strncpy_from_user(name, optval, + val = strncpy_from_sockptr(name, optval, min_t(long, TCP_ULP_NAME_MAX - 1, optlen)); if (val < 0) @@ -3079,7 +3079,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, optlen != TCP_FASTOPEN_KEY_BUF_LENGTH) return -EINVAL; - if (copy_from_user(key, optval, optlen)) + if (copy_from_sockptr(key, optval, optlen)) return -EFAULT; if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH) @@ -3095,7 +3095,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); @@ -3174,9 +3174,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (!tp->repair) err = -EINVAL; else if (sk->sk_state == TCP_ESTABLISHED) - err = tcp_repair_options_est(sk, - (struct tcp_repair_opt __user *)optval, - optlen); + err = tcp_repair_options_est(sk, optval, optlen); else err = -EPERM; break; @@ -3249,8 +3247,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: - err = tp->af_specific->md5_parse(sk, optname, - USER_SOCKPTR(optval), optlen); + err = tp->af_specific->md5_parse(sk, optname, optval, optlen); break; #endif case TCP_USER_TIMEOUT: @@ -3334,7 +3331,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); - return do_tcp_setsockopt(sk, level, optname, optval, optlen); + return do_tcp_setsockopt(sk, level, optname, USER_SOCKPTR(optval), + optlen); } EXPORT_SYMBOL(tcp_setsockopt); -- cgit From a7b75c5a8c41445f33efb663887ff5f5c3b4454b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:07 +0200 Subject: net: pass a sockptr_t into ->setsockopt Rework the remaining setsockopt code to pass a sockptr_t instead of a plain user pointer. This removes the last remaining set_fs(KERNEL_DS) outside of architecture specific code. Signed-off-by: Christoph Hellwig Acked-by: Stefan Schmidt [ieee802154] Acked-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/atm/common.c | 6 +++--- net/atm/common.h | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 6 +++--- net/ax25/af_ax25.c | 6 +++--- net/bluetooth/hci_sock.c | 8 ++++---- net/bluetooth/l2cap_sock.c | 22 +++++++++++----------- net/bluetooth/rfcomm/sock.c | 12 +++++++----- net/bluetooth/sco.c | 6 +++--- net/caif/caif_socket.c | 8 ++++---- net/can/j1939/socket.c | 12 ++++++------ net/can/raw.c | 16 ++++++++-------- net/core/sock.c | 2 +- net/dccp/dccp.h | 2 +- net/dccp/proto.c | 20 ++++++++++---------- net/decnet/af_decnet.c | 16 +++++++++------- net/ieee802154/socket.c | 6 +++--- net/ipv4/ip_sockglue.c | 13 +++++-------- net/ipv4/raw.c | 8 ++++---- net/ipv4/tcp.c | 5 ++--- net/ipv4/udp.c | 6 +++--- net/ipv4/udp_impl.h | 4 ++-- net/ipv6/ipv6_sockglue.c | 10 ++++------ net/ipv6/raw.c | 10 +++++----- net/ipv6/udp.c | 6 +++--- net/ipv6/udp_impl.h | 4 ++-- net/iucv/af_iucv.c | 4 ++-- net/kcm/kcmsock.c | 6 +++--- net/l2tp/l2tp_ppp.c | 4 ++-- net/llc/af_llc.c | 4 ++-- net/mptcp/protocol.c | 12 +++++------- net/netlink/af_netlink.c | 4 ++-- net/netrom/af_netrom.c | 4 ++-- net/nfc/llcp_sock.c | 6 +++--- net/packet/af_packet.c | 39 ++++++++++++++++++++------------------- net/phonet/pep.c | 4 ++-- net/rds/af_rds.c | 30 ++++++++++++++---------------- net/rds/rdma.c | 14 ++++++-------- net/rds/rds.h | 6 +++--- net/rose/af_rose.c | 4 ++-- net/rxrpc/af_rxrpc.c | 8 ++++---- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/key.c | 9 ++++----- net/sctp/socket.c | 4 ++-- net/smc/af_smc.c | 4 ++-- net/socket.c | 23 +++++++---------------- net/tipc/socket.c | 8 ++++---- net/tls/tls_main.c | 17 +++++++++-------- net/vmw_vsock/af_vsock.c | 4 ++-- net/x25/af_x25.c | 4 ++-- net/xdp/xsk.c | 8 ++++---- 51 files changed, 218 insertions(+), 234 deletions(-) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index 9b28f1fb3c69..84367b844b14 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -745,7 +745,7 @@ static int check_qos(const struct atm_qos *qos) } int vcc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct atm_vcc *vcc; unsigned long value; @@ -760,7 +760,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname, { struct atm_qos qos; - if (copy_from_user(&qos, optval, sizeof(qos))) + if (copy_from_sockptr(&qos, optval, sizeof(qos))) return -EFAULT; error = check_qos(&qos); if (error) @@ -774,7 +774,7 @@ int vcc_setsockopt(struct socket *sock, int level, int optname, return 0; } case SO_SETCLP: - if (get_user(value, (unsigned long __user *)optval)) + if (copy_from_sockptr(&value, optval, sizeof(value))) return -EFAULT; if (value) vcc->atm_options |= ATM_ATMOPT_CLP; diff --git a/net/atm/common.h b/net/atm/common.h index 5850649068bb..a1e56e8de698 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -21,7 +21,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen); + sockptr_t optval, unsigned int optlen); int vcc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); void vcc_process_recv_queue(struct atm_vcc *vcc); diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 02bd2a436bdf..53e7d3f39e26 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -63,7 +63,7 @@ static int pvc_connect(struct socket *sock, struct sockaddr *sockaddr, } static int pvc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int error; diff --git a/net/atm/svc.c b/net/atm/svc.c index ba144d035e3d..4a02bcaad279 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -451,7 +451,7 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) } static int svc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct atm_vcc *vcc = ATM_SD(sock); @@ -464,7 +464,7 @@ static int svc_setsockopt(struct socket *sock, int level, int optname, error = -EINVAL; goto out; } - if (copy_from_user(&vcc->sap, optval, optlen)) { + if (copy_from_sockptr(&vcc->sap, optval, optlen)) { error = -EFAULT; goto out; } @@ -475,7 +475,7 @@ static int svc_setsockopt(struct socket *sock, int level, int optname, error = -EINVAL; goto out; } - if (get_user(value, (int __user *)optval)) { + if (copy_from_sockptr(&value, optval, sizeof(int))) { error = -EFAULT; goto out; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd91cd34f25e..17bf31a89692 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -528,7 +528,7 @@ ax25_cb *ax25_create_cb(void) */ static int ax25_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; ax25_cb *ax25; @@ -543,7 +543,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(unsigned int)) return -EINVAL; - if (get_user(opt, (unsigned int __user *)optval)) + if (copy_from_sockptr(&opt, optval, sizeof(unsigned int))) return -EFAULT; lock_sock(sk); @@ -640,7 +640,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, memset(devname, 0, sizeof(devname)); - if (copy_from_user(devname, optval, optlen)) { + if (copy_from_sockptr(devname, optval, optlen)) { res = -EFAULT; break; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index caf38a8ea6a8..d5eff27d5b1e 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1842,7 +1842,7 @@ drop: } static int hci_sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int len) + sockptr_t optval, unsigned int len) { struct hci_ufilter uf = { .opcode = 0 }; struct sock *sk = sock->sk; @@ -1862,7 +1862,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case HCI_DATA_DIR: - if (get_user(opt, (int __user *)optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(opt))) { err = -EFAULT; break; } @@ -1874,7 +1874,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, break; case HCI_TIME_STAMP: - if (get_user(opt, (int __user *)optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(opt))) { err = -EFAULT; break; } @@ -1896,7 +1896,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, } len = min_t(unsigned int, len, sizeof(uf)); - if (copy_from_user(&uf, optval, len)) { + if (copy_from_sockptr(&uf, optval, len)) { err = -EFAULT; break; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a995d2c51fa7..a3d104123f38 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -703,7 +703,7 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) } static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; @@ -736,7 +736,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, opts.txwin_size = chan->tx_win; len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_user((char *) &opts, optval, len)) { + if (copy_from_sockptr(&opts, optval, len)) { err = -EFAULT; break; } @@ -782,7 +782,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; case L2CAP_LM: - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -859,7 +859,7 @@ static int l2cap_set_mode(struct l2cap_chan *chan, u8 mode) } static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; @@ -891,7 +891,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_user((char *) &sec, optval, len)) { + if (copy_from_sockptr(&sec, optval, len)) { err = -EFAULT; break; } @@ -939,7 +939,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -954,7 +954,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_FLUSHABLE: - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -990,7 +990,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, pwr.force_active = BT_POWER_FORCE_ACTIVE_ON; len = min_t(unsigned int, sizeof(pwr), optlen); - if (copy_from_user((char *) &pwr, optval, len)) { + if (copy_from_sockptr(&pwr, optval, len)) { err = -EFAULT; break; } @@ -1002,7 +1002,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -1050,7 +1050,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u16 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u16))) { err = -EFAULT; break; } @@ -1081,7 +1081,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u8 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u8))) { err = -EFAULT; break; } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index df14eebe80da..dba4ea0e1b0d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -644,7 +644,8 @@ static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, return len; } -static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) +static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int err = 0; @@ -656,7 +657,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u switch (optname) { case RFCOMM_LM: - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -685,7 +686,8 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u return err; } -static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct bt_security sec; @@ -713,7 +715,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c sec.level = BT_SECURITY_LOW; len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_user((char *) &sec, optval, len)) { + if (copy_from_sockptr(&sec, optval, len)) { err = -EFAULT; break; } @@ -732,7 +734,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c break; } - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c8c3d38cdc7b..37260baf7150 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -791,7 +791,7 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg, } static int sco_sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int len, err = 0; @@ -810,7 +810,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -831,7 +831,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, voice.setting = sco_pi(sk)->setting; len = min_t(unsigned int, sizeof(voice), optlen); - if (copy_from_user((char *)&voice, optval, len)) { + if (copy_from_sockptr(&voice, optval, len)) { err = -EFAULT; break; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index b94ecd931002..3ad0a1df6712 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -669,8 +669,8 @@ out_err: return sent ? : err; } -static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) +static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov, + unsigned int ol) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -685,7 +685,7 @@ static int setsockopt(struct socket *sock, return -EINVAL; if (lvl != SOL_CAIF) goto bad_sol; - if (copy_from_user(&linksel, ov, sizeof(int))) + if (copy_from_sockptr(&linksel, ov, sizeof(int))) return -EINVAL; lock_sock(&(cf_sk->sk)); cf_sk->conn_req.link_selector = linksel; @@ -699,7 +699,7 @@ static int setsockopt(struct socket *sock, return -ENOPROTOOPT; lock_sock(&(cf_sk->sk)); if (ol > sizeof(cf_sk->conn_req.param.data) || - copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) { + copy_from_sockptr(&cf_sk->conn_req.param.data, ov, ol)) { release_sock(&cf_sk->sk); return -EINVAL; } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index f7587428febd..78ff9b3f1d40 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -627,14 +627,14 @@ static int j1939_sk_release(struct socket *sock) return 0; } -static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval, +static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, sockptr_t optval, unsigned int optlen, int flag) { int tmp; if (optlen != sizeof(tmp)) return -EINVAL; - if (copy_from_user(&tmp, optval, optlen)) + if (copy_from_sockptr(&tmp, optval, optlen)) return -EFAULT; lock_sock(&jsk->sk); if (tmp) @@ -646,7 +646,7 @@ static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval, } static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct j1939_sock *jsk = j1939_sk(sk); @@ -658,7 +658,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case SO_J1939_FILTER: - if (optval) { + if (!sockptr_is_null(optval)) { struct j1939_filter *f; int c; @@ -670,7 +670,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; count = optlen / sizeof(*filters); - filters = memdup_user(optval, optlen); + filters = memdup_sockptr(optval, optlen); if (IS_ERR(filters)) return PTR_ERR(filters); @@ -703,7 +703,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, case SO_J1939_SEND_PRIO: if (optlen != sizeof(tmp)) return -EINVAL; - if (copy_from_user(&tmp, optval, optlen)) + if (copy_from_sockptr(&tmp, optval, optlen)) return -EFAULT; if (tmp < 0 || tmp > 7) return -EDOM; diff --git a/net/can/raw.c b/net/can/raw.c index 59c039d73c6d..94a9405658dc 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -485,7 +485,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr, } static int raw_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -511,11 +511,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (count > 1) { /* filter does not fit into dfilter => alloc space */ - filter = memdup_user(optval, optlen); + filter = memdup_sockptr(optval, optlen); if (IS_ERR(filter)) return PTR_ERR(filter); } else if (count == 1) { - if (copy_from_user(&sfilter, optval, sizeof(sfilter))) + if (copy_from_sockptr(&sfilter, optval, sizeof(sfilter))) return -EFAULT; } @@ -568,7 +568,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen != sizeof(err_mask)) return -EINVAL; - if (copy_from_user(&err_mask, optval, optlen)) + if (copy_from_sockptr(&err_mask, optval, optlen)) return -EFAULT; err_mask &= CAN_ERR_MASK; @@ -607,7 +607,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen != sizeof(ro->loopback)) return -EINVAL; - if (copy_from_user(&ro->loopback, optval, optlen)) + if (copy_from_sockptr(&ro->loopback, optval, optlen)) return -EFAULT; break; @@ -616,7 +616,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen != sizeof(ro->recv_own_msgs)) return -EINVAL; - if (copy_from_user(&ro->recv_own_msgs, optval, optlen)) + if (copy_from_sockptr(&ro->recv_own_msgs, optval, optlen)) return -EFAULT; break; @@ -625,7 +625,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen != sizeof(ro->fd_frames)) return -EINVAL; - if (copy_from_user(&ro->fd_frames, optval, optlen)) + if (copy_from_sockptr(&ro->fd_frames, optval, optlen)) return -EFAULT; break; @@ -634,7 +634,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (optlen != sizeof(ro->join_filters)) return -EINVAL; - if (copy_from_user(&ro->join_filters, optval, optlen)) + if (copy_from_sockptr(&ro->join_filters, optval, optlen)) return -EFAULT; break; diff --git a/net/core/sock.c b/net/core/sock.c index 1444d7d53ba2..2c5dd1397775 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3211,7 +3211,7 @@ EXPORT_SYMBOL(sock_common_recvmsg); * Set socket options on an inet socket. */ int sock_common_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 434eea91b767..9cc9d1ee6cdb 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -295,7 +295,7 @@ int dccp_disconnect(struct sock *sk, int flags); int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); + sockptr_t optval, unsigned int optlen); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9e453611107f..2e9e8449698f 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -411,7 +411,7 @@ out: EXPORT_SYMBOL_GPL(dccp_ioctl); static int dccp_setsockopt_service(struct sock *sk, const __be32 service, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; @@ -426,9 +426,9 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return -ENOMEM; sl->dccpsl_nr = optlen / sizeof(u32) - 1; - if (copy_from_user(sl->dccpsl_list, - optval + sizeof(service), - optlen - sizeof(service)) || + sockptr_advance(optval, sizeof(service)); + if (copy_from_sockptr(sl->dccpsl_list, optval, + optlen - sizeof(service)) || dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { kfree(sl); return -EFAULT; @@ -482,7 +482,7 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) } static int dccp_setsockopt_ccid(struct sock *sk, int type, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { u8 *val; int rc = 0; @@ -490,7 +490,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type, if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; - val = memdup_user(optval, optlen); + val = memdup_sockptr(optval, optlen); if (IS_ERR(val)) return PTR_ERR(val); @@ -507,7 +507,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type, } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; @@ -529,7 +529,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, if (optlen < (int)sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; if (optname == DCCP_SOCKOPT_SERVICE) @@ -572,8 +572,8 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, return err; } -int dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { if (level != SOL_DCCP) return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7d51ab608fb3..3b53d766789d 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -150,7 +150,8 @@ static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; static struct hlist_head dn_wild_sk; static atomic_long_t decnet_memory_allocated; -static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags); +static int __dn_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen, int flags); static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags); static struct hlist_head *dn_find_list(struct sock *sk) @@ -1320,7 +1321,8 @@ out: return err; } -static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +static int dn_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int err; @@ -1332,14 +1334,14 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != DSO_LINKINFO && optname != DSO_STREAM && optname != DSO_SEQPACKET) - err = nf_setsockopt(sk, PF_DECnet, optname, - USER_SOCKPTR(optval), optlen); + err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); #endif return err; } -static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags) +static int __dn_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen, int flags) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); @@ -1355,13 +1357,13 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us } u; int err; - if (optlen && !optval) + if (optlen && sockptr_is_null(optval)) return -EINVAL; if (optlen > sizeof(u)) return -EINVAL; - if (copy_from_user(&u, optval, optlen)) + if (copy_from_sockptr(&u, optval, optlen)) return -EFAULT; switch (optname) { diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 94ae9662133e..a45a0401adc5 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -382,7 +382,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname, } static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { return -EOPNOTSUPP; } @@ -872,7 +872,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname, } static int dgram_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct dgram_sock *ro = dgram_sk(sk); struct net *net = sock_net(sk); @@ -882,7 +882,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; lock_sock(sk); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f7f1507b89fe..8dc027e54c5b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1401,21 +1401,19 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) skb_dst_drop(skb); } -int ip_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, unsigned int optlen) +int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { int err; if (level != SOL_IP) return -ENOPROTOOPT; - err = do_ip_setsockopt(sk, level, optname, USER_SOCKPTR(optval), - optlen); + err = do_ip_setsockopt(sk, level, optname, optval, optlen); #if IS_ENABLED(CONFIG_BPFILTER_UMH) if (optname >= BPFILTER_IPT_SO_SET_REPLACE && optname < BPFILTER_IPT_SET_MAX) - err = bpfilter_ip_set_sockopt(sk, optname, USER_SOCKPTR(optval), - optlen); + err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); #endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ @@ -1423,8 +1421,7 @@ int ip_setsockopt(struct sock *sk, int level, optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY && !ip_mroute_opt(optname)) - err = nf_setsockopt(sk, PF_INET, optname, USER_SOCKPTR(optval), - optlen); + err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); #endif return err; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2a57d633b31e..6fd4330287c2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -809,11 +809,11 @@ static int raw_sk_init(struct sock *sk) return 0; } -static int raw_seticmpfilter(struct sock *sk, char __user *optval, int optlen) +static int raw_seticmpfilter(struct sock *sk, sockptr_t optval, int optlen) { if (optlen > sizeof(struct icmp_filter)) optlen = sizeof(struct icmp_filter); - if (copy_from_user(&raw_sk(sk)->filter, optval, optlen)) + if (copy_from_sockptr(&raw_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; } @@ -838,7 +838,7 @@ out: return ret; } static int do_raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { if (optname == ICMP_FILTER) { if (inet_sk(sk)->inet_num != IPPROTO_ICMP) @@ -850,7 +850,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname, } static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { if (level != SOL_RAW) return ip_setsockopt(sk, level, optname, optval, optlen); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71cbc61c335f..27de9380ed14 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3323,7 +3323,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, return err; } -int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, +int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -3331,8 +3331,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); - return do_tcp_setsockopt(sk, level, optname, USER_SOCKPTR(optval), - optlen); + return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c6cb2d09dbc7..5a6a2f6d86b9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2703,12 +2703,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL(udp_lib_setsockopt); -int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, - USER_SOCKPTR(optval), optlen, + optval, optlen, udp_push_pending_frames); return ip_setsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index ab313702c87f..2878d8285caf 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -12,8 +12,8 @@ int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); int udp_v4_get_port(struct sock *sk, unsigned short snum); void udp_v4_rehash(struct sock *sk); -int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index dcd000a5a9b1..d2282f5c9760 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -980,8 +980,8 @@ e_inval: return -EINVAL; } -int ipv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { int err; @@ -991,14 +991,12 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - err = do_ipv6_setsockopt(sk, level, optname, USER_SOCKPTR(optval), - optlen); + err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && optname != IPV6_XFRM_POLICY) - err = nf_setsockopt(sk, PF_INET6, optname, USER_SOCKPTR(optval), - optlen); + err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen); #endif return err; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 594e01ad670a..874f01cd7aec 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -972,13 +972,13 @@ do_confirm: } static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + sockptr_t optval, int optlen) { switch (optname) { case ICMPV6_FILTER: if (optlen > sizeof(struct icmp6_filter)) optlen = sizeof(struct icmp6_filter); - if (copy_from_user(&raw6_sk(sk)->filter, optval, optlen)) + if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; default: @@ -1015,12 +1015,12 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct raw6_sock *rp = raw6_sk(sk); int val; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (optname) { @@ -1062,7 +1062,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, } static int rawv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { switch (level) { case SOL_RAW: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2df1e6c9d7cb..15818e18655d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1618,12 +1618,12 @@ void udpv6_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, - USER_SOCKPTR(optval), optlen, + optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 30dfb6f1b762..b2fcc46c1630 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -17,8 +17,8 @@ void udp_v6_rehash(struct sock *sk); int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -int udpv6_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ee0add15497d..6ee9851ac7c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1494,7 +1494,7 @@ static int iucv_sock_release(struct socket *sock) /* getsockopt and setsockopt */ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); @@ -1507,7 +1507,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *) optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; rc = 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 56fac24a627a..56dad9565bc9 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1265,7 +1265,7 @@ static void kcm_recv_enable(struct kcm_sock *kcm) } static int kcm_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct kcm_sock *kcm = kcm_sk(sock->sk); int val, valbool; @@ -1277,8 +1277,8 @@ static int kcm_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(int))) + return -EFAULT; valbool = val ? 1 : 0; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index e58fe7e3b884..4389df66af35 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1242,7 +1242,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, * session or the special tunnel type. */ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct l2tp_session *session; @@ -1256,7 +1256,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; err = -ENOTCONN; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6140a3e46c26..7180979114e4 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1053,7 +1053,7 @@ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd, * Set various connection specific parameters. */ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); @@ -1063,7 +1063,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) goto out; - rc = get_user(opt, (int __user *)optval); + rc = copy_from_sockptr(&opt, optval, sizeof(opt)); if (rc) goto out; rc = -EINVAL; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7246847efa90..2891ae8a1028 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1632,7 +1632,7 @@ static void mptcp_destroy(struct sock *sk) } static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; struct socket *ssock; @@ -1648,8 +1648,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return -EINVAL; } - ret = sock_setsockopt(ssock, SOL_SOCKET, optname, - USER_SOCKPTR(optval), optlen); + ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); if (ret == 0) { if (optname == SO_REUSEPORT) sk->sk_reuseport = ssock->sk->sk_reuseport; @@ -1660,12 +1659,11 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, return ret; } - return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, - USER_SOCKPTR(optval), optlen); + return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); } static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = (struct sock *)msk; int ret = -EOPNOTSUPP; @@ -1692,7 +1690,7 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, } static int mptcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); struct sock *ssk; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3cd58f0c2de4..d8921b833744 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1621,7 +1621,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk, } static int netlink_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); @@ -1632,7 +1632,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; if (optlen >= sizeof(int) && - get_user(val, (unsigned int __user *)optval)) + copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (optname) { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index f90ef6934b8f..6d16e1ab1a8a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -294,7 +294,7 @@ void nr_destroy_socket(struct sock *sk) */ static int nr_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); @@ -306,7 +306,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(unsigned int)) return -EINVAL; - if (get_user(opt, (unsigned int __user *)optval)) + if (copy_from_sockptr(&opt, optval, sizeof(unsigned int))) return -EFAULT; switch (optname) { diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 6da1e2334bb6..d257ed3b732a 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -218,7 +218,7 @@ error: } static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -241,7 +241,7 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } @@ -263,7 +263,7 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, break; } - if (get_user(opt, (u32 __user *) optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d8d4f78f78e4..0b8160d1a6e0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1558,7 +1558,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, return 0; } -static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, +static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, unsigned int len) { struct bpf_prog *new; @@ -1568,7 +1568,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, return -EPERM; if (len != sizeof(fd)) return -EINVAL; - if (copy_from_user(&fd, data, len)) + if (copy_from_sockptr(&fd, data, len)) return -EFAULT; new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); @@ -1579,12 +1579,12 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, return 0; } -static int fanout_set_data(struct packet_sock *po, char __user *data, +static int fanout_set_data(struct packet_sock *po, sockptr_t data, unsigned int len) { switch (po->fanout->type) { case PACKET_FANOUT_CBPF: - return fanout_set_data_cbpf(po, USER_SOCKPTR(data), len); + return fanout_set_data_cbpf(po, data, len); case PACKET_FANOUT_EBPF: return fanout_set_data_ebpf(po, data, len); default: @@ -3652,7 +3652,8 @@ static void packet_flush_mclist(struct sock *sk) } static int -packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, + unsigned int optlen) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); @@ -3672,7 +3673,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EINVAL; if (len > sizeof(mreq)) len = sizeof(mreq); - if (copy_from_user(&mreq, optval, len)) + if (copy_from_sockptr(&mreq, optval, len)) return -EFAULT; if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) return -EINVAL; @@ -3703,7 +3704,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen < len) { ret = -EINVAL; } else { - if (copy_from_user(&req_u.req, optval, len)) + if (copy_from_sockptr(&req_u.req, optval, len)) ret = -EFAULT; else ret = packet_set_ring(sk, &req_u, 0, @@ -3718,7 +3719,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; pkt_sk(sk)->copy_thresh = val; @@ -3730,7 +3731,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: @@ -3756,7 +3757,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; @@ -3776,7 +3777,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); @@ -3795,7 +3796,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen < sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); @@ -3809,7 +3810,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen < sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); @@ -3825,7 +3826,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EINVAL; if (optlen < sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); @@ -3844,7 +3845,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; po->tp_tstamp = val; @@ -3856,7 +3857,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; return fanout_add(sk, val & 0xffff, val >> 16); @@ -3874,7 +3875,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; if (val < 0 || val > 1) return -EINVAL; @@ -3888,7 +3889,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; lock_sock(sk); @@ -3907,7 +3908,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(val))) + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; po->xmit = val ? packet_direct_xmit : dev_queue_xmit; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4577e43cb777..e47d09aca4af 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -975,7 +975,7 @@ static int pep_init(struct sock *sk) } static int pep_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct pep_sock *pn = pep_sk(sk); int val = 0, err = 0; @@ -983,7 +983,7 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_PNPIPE) return -ENOPROTOOPT; if (optlen >= sizeof(int)) { - if (get_user(val, (int __user *) optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; } diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 1a5bf3fa4578..b239120dd9ca 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -290,8 +290,7 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return 0; } -static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, - int len) +static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len) { struct sockaddr_in6 sin6; struct sockaddr_in sin; @@ -308,14 +307,15 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, goto out; } else if (len < sizeof(struct sockaddr_in6)) { /* Assume IPv4 */ - if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { + if (copy_from_sockptr(&sin, optval, + sizeof(struct sockaddr_in))) { ret = -EFAULT; goto out; } ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); sin6.sin6_port = sin.sin_port; } else { - if (copy_from_user(&sin6, optval, + if (copy_from_sockptr(&sin6, optval, sizeof(struct sockaddr_in6))) { ret = -EFAULT; goto out; @@ -327,21 +327,20 @@ out: return ret; } -static int rds_set_bool_option(unsigned char *optvar, char __user *optval, +static int rds_set_bool_option(unsigned char *optvar, sockptr_t optval, int optlen) { int value; if (optlen < sizeof(int)) return -EINVAL; - if (get_user(value, (int __user *) optval)) + if (copy_from_sockptr(&value, optval, sizeof(int))) return -EFAULT; *optvar = !!value; return 0; } -static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, - int optlen) +static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen) { int ret; @@ -358,8 +357,7 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, return ret; } -static int rds_set_transport(struct rds_sock *rs, char __user *optval, - int optlen) +static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) { int t_type; @@ -369,7 +367,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval, if (optlen != sizeof(int)) return -EINVAL; - if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type))) + if (copy_from_sockptr(&t_type, optval, sizeof(t_type))) return -EFAULT; if (t_type < 0 || t_type >= RDS_TRANS_COUNT) @@ -380,7 +378,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval, return rs->rs_transport ? 0 : -ENOPROTOOPT; } -static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, +static int rds_enable_recvtstamp(struct sock *sk, sockptr_t optval, int optlen, int optname) { int val, valbool; @@ -388,7 +386,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, if (optlen != sizeof(int)) return -EFAULT; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; valbool = val ? 1 : 0; @@ -404,7 +402,7 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, return 0; } -static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, +static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_rx_trace_so trace; @@ -413,7 +411,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, if (optlen != sizeof(struct rds_rx_trace_so)) return -EFAULT; - if (copy_from_user(&trace, optval, sizeof(trace))) + if (copy_from_sockptr(&trace, optval, sizeof(trace))) return -EFAULT; if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX) @@ -432,7 +430,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, } static int rds_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); int ret; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index a7ae11846cd7..ccdd304eae0a 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -353,21 +353,20 @@ out: return ret; } -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_get_mr_args args; if (optlen != sizeof(struct rds_get_mr_args)) return -EINVAL; - if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval, - sizeof(struct rds_get_mr_args))) + if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_args))) return -EFAULT; return __rds_rdma_map(rs, &args, NULL, NULL, NULL); } -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_get_mr_for_dest_args args; struct rds_get_mr_args new_args; @@ -375,7 +374,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) if (optlen != sizeof(struct rds_get_mr_for_dest_args)) return -EINVAL; - if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval, + if (copy_from_sockptr(&args, optval, sizeof(struct rds_get_mr_for_dest_args))) return -EFAULT; @@ -394,7 +393,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) /* * Free the MR indicated by the given R_Key */ -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_free_mr_args args; struct rds_mr *mr; @@ -403,8 +402,7 @@ int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) if (optlen != sizeof(struct rds_free_mr_args)) return -EINVAL; - if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval, - sizeof(struct rds_free_mr_args))) + if (copy_from_sockptr(&args, optval, sizeof(struct rds_free_mr_args))) return -EFAULT; /* Special case - a null cookie means flush all unused MRs */ diff --git a/net/rds/rds.h b/net/rds/rds.h index 106e862996b9..d35d1fc39807 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -924,9 +924,9 @@ int rds_send_pong(struct rds_conn_path *cp, __be16 dport); /* rdma.c */ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); -int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); -int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); -int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); +int rds_get_mr(struct rds_sock *rs, sockptr_t optval, int optlen); +int rds_get_mr_for_dest(struct rds_sock *rs, sockptr_t optval, int optlen); +int rds_free_mr(struct rds_sock *rs, sockptr_t optval, int optlen); void rds_rdma_drop_keys(struct rds_sock *rs); int rds_rdma_extra_size(struct rds_rdma_args *args, struct rds_iov_vector *iov); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ce85656ac9c1..cf7d974e0f61 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -365,7 +365,7 @@ void rose_destroy_socket(struct sock *sk) */ static int rose_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); @@ -377,7 +377,7 @@ static int rose_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(opt, (int __user *)optval)) + if (copy_from_sockptr(&opt, optval, sizeof(int))) return -EFAULT; switch (optname) { diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index cd7d0d204c74..e6725a6de015 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -588,7 +588,7 @@ EXPORT_SYMBOL(rxrpc_sock_set_min_security_level); * set RxRPC socket options */ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); unsigned int min_sec_level; @@ -639,8 +639,8 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, ret = -EISCONN; if (rx->sk.sk_state != RXRPC_UNBOUND) goto error; - ret = get_user(min_sec_level, - (unsigned int __user *) optval); + ret = copy_from_sockptr(&min_sec_level, optval, + sizeof(unsigned int)); if (ret < 0) goto error; ret = -EINVAL; @@ -658,7 +658,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, if (rx->sk.sk_state != RXRPC_SERVER_BOUND2) goto error; ret = -EFAULT; - if (copy_from_user(service_upgrade, optval, + if (copy_from_sockptr(service_upgrade, optval, sizeof(service_upgrade)) != 0) goto error; ret = -EINVAL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9a2139ebd67d..6d29a3603a3e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -909,8 +909,8 @@ extern const struct rxrpc_security rxrpc_no_security; extern struct key_type key_type_rxrpc; extern struct key_type key_type_rxrpc_s; -int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); -int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); +int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int); +int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, u32); diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 0c98313dd7a8..94c3df392651 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -896,7 +896,7 @@ static void rxrpc_describe(const struct key *key, struct seq_file *m) /* * grab the security key for a socket */ -int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen) +int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) { struct key *key; char *description; @@ -906,7 +906,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen) if (optlen <= 0 || optlen > PAGE_SIZE - 1) return -EINVAL; - description = memdup_user_nul(optval, optlen); + description = memdup_sockptr_nul(optval, optlen); if (IS_ERR(description)) return PTR_ERR(description); @@ -926,8 +926,7 @@ int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen) /* * grab the security keyring for a server socket */ -int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval, - int optlen) +int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) { struct key *key; char *description; @@ -937,7 +936,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval, if (optlen <= 0 || optlen > PAGE_SIZE - 1) return -EINVAL; - description = memdup_user_nul(optval, optlen); + description = memdup_sockptr_nul(optval, optlen); if (IS_ERR(description)) return PTR_ERR(description); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9a767f359718..144808dfea9e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4429,7 +4429,7 @@ out: * optlen - the size of the buffer. */ static int sctp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { void *kopt = NULL; int retval = 0; @@ -4449,7 +4449,7 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, } if (optlen > 0) { - kopt = memdup_user(optval, optlen); + kopt = memdup_sockptr(optval, optlen); if (IS_ERR(kopt)) return PTR_ERR(kopt); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9711c9e0e515..4ac1d4de6676 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1731,7 +1731,7 @@ out: } static int smc_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct smc_sock *smc; @@ -1754,7 +1754,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) + if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; lock_sock(sk); diff --git a/net/socket.c b/net/socket.c index c97f83d879ae..e44b8ac47f6f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2094,10 +2094,10 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) * Set a socket option. Because we don't know the option lengths we have * to pass the user mode parameter for the protocols to sort out. */ -int __sys_setsockopt(int fd, int level, int optname, char __user *optval, +int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - mm_segment_t oldfs = get_fs(); + sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2115,7 +2115,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval, if (!in_compat_syscall()) err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, - optval, &optlen, + user_optval, &optlen, &kernel_optval); if (err < 0) goto out_put; @@ -2124,25 +2124,16 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *optval, goto out_put; } - if (kernel_optval) { - set_fs(KERNEL_DS); - optval = (char __user __force *)kernel_optval; - } - + if (kernel_optval) + optval = KERNEL_SOCKPTR(kernel_optval); if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) - err = sock_setsockopt(sock, level, optname, - USER_SOCKPTR(optval), optlen); + err = sock_setsockopt(sock, level, optname, optval, optlen); else if (unlikely(!sock->ops->setsockopt)) err = -EOPNOTSUPP; else err = sock->ops->setsockopt(sock, level, optname, optval, optlen); - - if (kernel_optval) { - set_fs(oldfs); - kfree(kernel_optval); - } - + kfree(kernel_optval); out_put: fput_light(sock->file, fput_needed); return err; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index fc388cef6471..07419f36116a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3103,7 +3103,7 @@ static int tipc_sk_leave(struct tipc_sock *tsk) * Returns 0 on success, errno otherwise */ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, - char __user *ov, unsigned int ol) + sockptr_t ov, unsigned int ol) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); @@ -3124,17 +3124,17 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, case TIPC_NODELAY: if (ol < sizeof(value)) return -EINVAL; - if (get_user(value, (u32 __user *)ov)) + if (copy_from_sockptr(&value, ov, sizeof(u32))) return -EFAULT; break; case TIPC_GROUP_JOIN: if (ol < sizeof(mreq)) return -EINVAL; - if (copy_from_user(&mreq, ov, sizeof(mreq))) + if (copy_from_sockptr(&mreq, ov, sizeof(mreq))) return -EFAULT; break; default: - if (ov || ol) + if (!sockptr_is_null(ov) || ol) return -EINVAL; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index ec10041c6b7d..d77f7d821130 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -450,7 +450,7 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, return do_tls_getsockopt(sk, optname, optval, optlen); } -static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, +static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { struct tls_crypto_info *crypto_info; @@ -460,7 +460,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, int rc = 0; int conf; - if (!optval || (optlen < sizeof(*crypto_info))) { + if (sockptr_is_null(optval) || (optlen < sizeof(*crypto_info))) { rc = -EINVAL; goto out; } @@ -479,7 +479,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, goto out; } - rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); + rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; @@ -522,8 +522,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, goto err_crypto_info; } - rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), - optlen - sizeof(*crypto_info)); + sockptr_advance(optval, sizeof(*crypto_info)); + rc = copy_from_sockptr(crypto_info + 1, optval, + optlen - sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; @@ -579,8 +580,8 @@ out: return rc; } -static int do_tls_setsockopt(struct sock *sk, int optname, - char __user *optval, unsigned int optlen) +static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, + unsigned int optlen) { int rc = 0; @@ -600,7 +601,7 @@ static int do_tls_setsockopt(struct sock *sk, int optname, } static int tls_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct tls_context *ctx = tls_get_ctx(sk); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index df204c6761c4..27bbcfad9c17 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1517,7 +1517,7 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk, static int vsock_stream_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, + sockptr_t optval, unsigned int optlen) { int err; @@ -1535,7 +1535,7 @@ static int vsock_stream_setsockopt(struct socket *sock, err = -EINVAL; \ goto exit; \ } \ - if (copy_from_user(&_v, optval, sizeof(_v)) != 0) { \ + if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \ err = -EFAULT; \ goto exit; \ } \ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d5b09bbff375..0bbb283f23c9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -431,7 +431,7 @@ void x25_destroy_socket_from_timer(struct sock *sk) */ static int x25_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { int opt; struct sock *sk = sock->sk; @@ -445,7 +445,7 @@ static int x25_setsockopt(struct socket *sock, int level, int optname, goto out; rc = -EFAULT; - if (get_user(opt, (int __user *)optval)) + if (copy_from_sockptr(&opt, optval, sizeof(int))) goto out; if (opt) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 26e3bba8c204..2e94a7e94671 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -702,7 +702,7 @@ struct xdp_umem_reg_v1 { }; static int xsk_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) + sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); @@ -720,7 +720,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(entries)) return -EINVAL; - if (copy_from_user(&entries, optval, sizeof(entries))) + if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; mutex_lock(&xs->mutex); @@ -747,7 +747,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, else if (optlen < sizeof(mr)) mr_size = sizeof(struct xdp_umem_reg_v1); - if (copy_from_user(&mr, optval, mr_size)) + if (copy_from_sockptr(&mr, optval, mr_size)) return -EFAULT; mutex_lock(&xs->mutex); @@ -774,7 +774,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, struct xsk_queue **q; int entries; - if (copy_from_user(&entries, optval, sizeof(entries))) + if (copy_from_sockptr(&entries, optval, sizeof(entries))) return -EFAULT; mutex_lock(&xs->mutex); -- cgit From 6d04fe15f78acdf8e32329e208552e226f7a8ae6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jul 2020 08:09:08 +0200 Subject: net: optimize the sockptr_t for unified kernel/user address spaces For architectures like x86 and arm64 we don't need the separate bit to indicate that a pointer is a kernel pointer as the address spaces are unified. That way the sockptr_t can be reduced to a union of two pointers, which leads to nicer calling conventions. The only caveat is that we need to check that users don't pass in kernel address and thus gain access to kernel memory. Thus the USER_SOCKPTR helper is replaced with a init_user_sockptr function that does this check and returns an error if it fails. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/bpfilter/sockopt.c | 14 ++++++++------ net/socket.c | 6 +++++- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 1b34cb9a7708..94f18d2352d0 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -57,16 +57,18 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, return bpfilter_mbox_request(sk, optname, optval, optlen, true); } -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen) +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, + char __user *user_optval, int __user *optlen) { - int len; + sockptr_t optval; + int err, len; if (get_user(len, optlen)) return -EFAULT; - - return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, - false); + err = init_user_sockptr(&optval, user_optval); + if (err) + return err; + return bpfilter_mbox_request(sk, optname, optval, len, false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/socket.c b/net/socket.c index e44b8ac47f6f..94ca4547cd7c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2097,7 +2097,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - sockptr_t optval = USER_SOCKPTR(user_optval); + sockptr_t optval; char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2105,6 +2105,10 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; + err = init_user_sockptr(&optval, user_optval); + if (err) + return err; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; -- cgit From dfd3d5266dc1d9a2b06e5a09bbff4cee547eeb5f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Jul 2020 08:48:55 +0200 Subject: sctp: fix slab-out-of-bounds in SCTP_DELAYED_SACK processing This sockopt accepts two kinds of parameters, using struct sctp_sack_info and struct sctp_assoc_value. The mentioned commit didn't notice an implicit cast from the smaller (latter) struct to the bigger one (former) when copying the data from the user space, which now leads to an attempt to write beyond the buffer (because it assumes the storing buffer is bigger than the parameter itself). Fix it by allocating a sctp_sack_info on stack and filling it out based on the small struct for the compat case. Changelog stole from an earlier patch from Marcelo Ricardo Leitner. Fixes: ebb25defdc17 ("sctp: pass a kernel pointer to sctp_setsockopt_delayed_ack") Reported-by: syzbot+0e4699d000d8b874d8dc@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 144808dfea9e..ec1fba1fbe71 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2749,31 +2749,12 @@ static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, * timer to expire. The default value for this is 2, setting this * value to 1 will disable the delayed sack algorithm. */ - -static int sctp_setsockopt_delayed_ack(struct sock *sk, - struct sctp_sack_info *params, - unsigned int optlen) +static int __sctp_setsockopt_delayed_ack(struct sock *sk, + struct sctp_sack_info *params) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - if (optlen == sizeof(struct sctp_sack_info)) { - if (params->sack_delay == 0 && params->sack_freq == 0) - return 0; - } else if (optlen == sizeof(struct sctp_assoc_value)) { - pr_warn_ratelimited(DEPRECATED - "%s (pid %d) " - "Use of struct sctp_assoc_value in delayed_ack socket option.\n" - "Use struct sctp_sack_info instead\n", - current->comm, task_pid_nr(current)); - - if (params->sack_delay == 0) - params->sack_freq = 1; - else - params->sack_freq = 0; - } else - return -EINVAL; - /* Validate value parameter. */ if (params->sack_delay > 500) return -EINVAL; @@ -2821,6 +2802,33 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, return 0; } +static int sctp_setsockopt_delayed_ack(struct sock *sk, + struct sctp_sack_info *params, + unsigned int optlen) +{ + if (optlen == sizeof(struct sctp_assoc_value)) { + struct sctp_assoc_value *v = (struct sctp_assoc_value *)params; + struct sctp_sack_info p; + + pr_warn_ratelimited(DEPRECATED + "%s (pid %d) " + "Use of struct sctp_assoc_value in delayed_ack socket option.\n" + "Use struct sctp_sack_info instead\n", + current->comm, task_pid_nr(current)); + + p.sack_assoc_id = v->assoc_id; + p.sack_delay = v->assoc_value; + p.sack_freq = v->assoc_value ? 0 : 1; + return __sctp_setsockopt_delayed_ack(sk, &p); + } + + if (optlen != sizeof(struct sctp_sack_info)) + return -EINVAL; + if (params->sack_delay == 0 && params->sack_freq == 0) + return 0; + return __sctp_setsockopt_delayed_ack(sk, params); +} + /* 7.1.3 Initialization Parameters (SCTP_INITMSG) * * Applications can specify protocol parameters for the default association -- cgit From a65878d6f00bb2d791348ae90dcc2ede1dbe2b91 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 24 Jul 2020 10:20:59 +0200 Subject: net: openvswitch: fixes potential deadlock in dp cleanup code The previous patch introduced a deadlock, this patch fixes it by making sure the work is canceled without holding the global ovs lock. This is done by moving the reorder processing one layer up to the netns level. Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on usage") Reported-by: syzbot+2c4ff3614695f75ce26c@syzkaller.appspotmail.com Reported-by: syzbot+bad6507e5db05017b008@syzkaller.appspotmail.com Reviewed-by: Paolo Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 23 ++++++++++++----------- net/openvswitch/datapath.h | 4 +--- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 95805f0e27bd..6b6822f82f70 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1655,7 +1655,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); - INIT_DELAYED_WORK(&dp->masks_rebalance, ovs_dp_masks_rebalance); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); @@ -1715,9 +1714,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); - schedule_delayed_work(&dp->masks_rebalance, - msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); - ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); @@ -1762,9 +1758,6 @@ static void __dp_destroy(struct datapath *dp) /* RCU destroy the flow table */ call_rcu(&dp->rcu, destroy_dp_rcu); - - /* Cancel remaining work. */ - cancel_delayed_work_sync(&dp->masks_rebalance); } static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -2349,14 +2342,18 @@ out: static void ovs_dp_masks_rebalance(struct work_struct *work) { - struct datapath *dp = container_of(work, struct datapath, - masks_rebalance.work); + struct ovs_net *ovs_net = container_of(work, struct ovs_net, + masks_rebalance.work); + struct datapath *dp; ovs_lock(); - ovs_flow_masks_rebalance(&dp->table); + + list_for_each_entry(dp, &ovs_net->dps, list_node) + ovs_flow_masks_rebalance(&dp->table); + ovs_unlock(); - schedule_delayed_work(&dp->masks_rebalance, + schedule_delayed_work(&ovs_net->masks_rebalance, msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); } @@ -2454,6 +2451,9 @@ static int __net_init ovs_init_net(struct net *net) INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); + INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance); + schedule_delayed_work(&ovs_net->masks_rebalance, + msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL)); return ovs_ct_init(net); } @@ -2508,6 +2508,7 @@ static void __net_exit ovs_exit_net(struct net *dnet) ovs_unlock(); + cancel_delayed_work_sync(&ovs_net->masks_rebalance); cancel_work_sync(&ovs_net->dp_notify_work); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 697a2354194b..24fcec22fde2 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -84,9 +84,6 @@ struct datapath { /* Switch meters. */ struct dp_meter_table meter_tbl; - - /* re-balance flow masks timer */ - struct delayed_work masks_rebalance; }; /** @@ -135,6 +132,7 @@ struct dp_upcall_info { struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; + struct delayed_work masks_rebalance; #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif -- cgit From 623b57bec7c8daf89623c95cd9cc919e4090c9da Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Jul 2020 14:09:19 +0100 Subject: sctp: remove redundant initialization of variable status The variable status is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Also put the variable declarations into reverse christmas tree order. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/protocol.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7ecaf7d575c0..d19db22262fd 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1367,15 +1367,15 @@ static struct pernet_operations sctp_ctrlsock_ops = { /* Initialize the universe into something sensible. */ static __init int sctp_init(void) { - int i; - int status = -EINVAL; - unsigned long goal; - unsigned long limit; unsigned long nr_pages = totalram_pages(); + unsigned long limit; + unsigned long goal; + int max_entry_order; + int num_entries; int max_share; + int status; int order; - int num_entries; - int max_entry_order; + int i; sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); -- cgit From c4e9e09f5589f9afe6b8f8c4fb078e0559bca667 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jul 2020 09:03:08 -0400 Subject: icmp: revise rfc4884 tests 1) Only accept packets with original datagram len field >= header len. The extension header must start after the original datagram headers. The embedded datagram len field is compared against the 128B minimum stipulated by RFC 4884. It is unlikely that headers extend beyond this. But as we know the exact header length, check explicitly. 2) Remove the check that datagram length must be <= 576B. This is a send constraint. There is no value in testing this on rx. Within private networks it may be known safe to send larger packets. Process these packets. This test was also too lax. It compared original datagram length rather than entire icmp packet length. The stand-alone fix would be: - if (hlen + skb->len > 576) + if (-skb_network_offset(skb) + skb->len > 576) Fixes: eba75c587e81 ("icmp: support rfc 4884") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 793aebf07c2a..8d2654cdbd77 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1164,16 +1164,12 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb, return; } - /* outer headers up to inner iph. skb->data is at inner payload */ + /* original datagram headers: end of icmph to payload (skb->data) */ hlen = -skb_transport_offset(skb) - sizeof(struct icmphdr); - /* per rfc 791: maximum packet length of 576 bytes */ - if (hlen + skb->len > 576) - return; - /* per rfc 4884: minimal datagram length of 128 bytes */ off = icmp_hdr(skb)->un.reserved[1] * sizeof(u32); - if (off < 128) + if (off < 128 || off < hlen) return; /* kernel has stripped headers: return payload offset in bytes */ -- cgit From 178c49d9f9a4b5ade00c93480d714708fe971e24 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jul 2020 09:03:09 -0400 Subject: icmp: prepare rfc 4884 for ipv6 The RFC 4884 spec is largely the same between IPv4 and IPv6. Factor out the IPv4 specific parts in preparation for IPv6 support: - icmp types supported - icmp header size, and thus offset to original datagram start - datagram length field offset in icmp(6)hdr. - datagram length field word size: 4B for IPv4, 8B for IPv6. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 17 ++++------------- net/ipv4/ip_sockglue.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8d2654cdbd77..7498c58460a1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1151,24 +1151,15 @@ static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off) } void ip_icmp_error_rfc4884(const struct sk_buff *skb, - struct sock_ee_data_rfc4884 *out) + struct sock_ee_data_rfc4884 *out, + int thlen, int off) { - int hlen, off; - - switch (icmp_hdr(skb)->type) { - case ICMP_DEST_UNREACH: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - break; - default: - return; - } + int hlen; /* original datagram headers: end of icmph to payload (skb->data) */ - hlen = -skb_transport_offset(skb) - sizeof(struct icmphdr); + hlen = -skb_transport_offset(skb) - thlen; /* per rfc 4884: minimal datagram length of 128 bytes */ - off = icmp_hdr(skb)->un.reserved[1] * sizeof(u32); if (off < 128 || off < hlen) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8dc027e54c5b..d2c223554ff7 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -390,6 +390,18 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } +static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb, + struct sock_ee_data_rfc4884 *out) +{ + switch (icmp_hdr(skb)->type) { + case ICMP_DEST_UNREACH: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr), + icmp_hdr(skb)->un.reserved[1] * 4); + } +} + void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { @@ -413,7 +425,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, if (skb_pull(skb, payload - skb->data)) { if (inet_sk(sk)->recverr_rfc4884) - ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); + ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb) == 0) -- cgit From 01370434df85eb76ecb1527a4466013c4aca2436 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jul 2020 09:03:10 -0400 Subject: icmp6: support rfc 4884 Extend the rfc 4884 read interface introduced for ipv4 in commit eba75c587e81 ("icmp: support rfc 4884") to ipv6. Add socket option SOL_IPV6/IPV6_RECVERR_RFC4884. Changes v1->v2: - make ipv6_icmp_error_rfc4884 static (file scope) Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 1 + net/ipv6/datagram.c | 16 ++++++++++++++++ net/ipv6/ipv6_sockglue.c | 12 ++++++++++++ 3 files changed, 29 insertions(+) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7498c58460a1..cf36f955bfe6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1173,6 +1173,7 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb, if (!ip_icmp_error_rfc4884_validate(skb, off)) out->flags |= SO_EE_RFC4884_FLAG_INVALID; } +EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884); int icmp_err(struct sk_buff *skb, u32 info) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 390bedde21a5..cc8ad7ddecda 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -284,6 +285,17 @@ int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, } EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only); +static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, + struct sock_ee_data_rfc4884 *out) +{ + switch (icmp6_hdr(skb)->icmp6_type) { + case ICMPV6_TIME_EXCEED: + case ICMPV6_DEST_UNREACH: + ip_icmp_error_rfc4884(skb, out, sizeof(struct icmp6hdr), + icmp6_hdr(skb)->icmp6_datagram_len * 8); + } +} + void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { @@ -313,6 +325,10 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, serr->port = port; __skb_pull(skb, payload - skb->data); + + if (inet6_sk(sk)->recverr_rfc4884) + ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); + skb_reset_transport_header(skb); if (sock_queue_err_skb(sk, skb)) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d2282f5c9760..20c740976334 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -965,6 +965,14 @@ done: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; + case IPV6_RECVERR_RFC4884: + if (optlen < sizeof(int)) + goto e_inval; + if (val < 0 || val > 1) + goto e_inval; + np->recverr_rfc4884 = valbool; + retv = 0; + break; } release_sock(sk); @@ -1439,6 +1447,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rtalert_isolate; break; + case IPV6_RECVERR_RFC4884: + val = np->recverr_rfc4884; + break; + default: return -ENOPROTOOPT; } -- cgit From 95075150d0bdaa78cc350efc606e6ed02fa2a991 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:49 +0100 Subject: l2tp: avoid multiple assignments checkpatch warns about multiple assignments. Update l2tp accordingly. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 6 +++--- net/l2tp/l2tp_ip.c | 12 ++++++++---- net/l2tp/l2tp_ip6.c | 6 ++++-- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7e3523015d6f..b871cceeff7c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -621,8 +621,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int length) { struct l2tp_tunnel *tunnel = session->tunnel; + u32 ns = 0, nr = 0; int offset; - u32 ns, nr; /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { @@ -644,7 +644,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * the control of the LNS. If no sequence numbers present but * we were expecting them, discard frame. */ - ns = nr = 0; L2TP_SKB_CB(skb)->has_seq = 0; if (tunnel->version == L2TP_HDR_VER_2) { if (hdrflags & L2TP_HDRFLAG_S) { @@ -826,7 +825,8 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) } /* Point to L2TP header */ - optr = ptr = skb->data; + optr = skb->data; + ptr = skb->data; /* Get L2TP header flags */ hdrflags = ntohs(*(__be16 *)ptr); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d81564cf1e7f..a159cb2bf0f4 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -124,7 +124,8 @@ static int l2tp_ip_recv(struct sk_buff *skb) goto discard; /* Point to L2TP header */ - optr = ptr = skb->data; + optr = skb->data; + ptr = skb->data; session_id = ntohl(*((__be32 *)ptr)); ptr += 4; @@ -153,7 +154,8 @@ static int l2tp_ip_recv(struct sk_buff *skb) goto discard_sess; /* Point to L2TP header */ - optr = ptr = skb->data; + optr = skb->data; + ptr = skb->data; ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); @@ -284,8 +286,10 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - if (addr->l2tp_addr.s_addr) - inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr; + if (addr->l2tp_addr.s_addr) { + inet->inet_rcv_saddr = addr->l2tp_addr.s_addr; + inet->inet_saddr = addr->l2tp_addr.s_addr; + } if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->inet_saddr = 0; /* Use device */ diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 614febf8dd0d..bc757bc7e264 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -137,7 +137,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) goto discard; /* Point to L2TP header */ - optr = ptr = skb->data; + optr = skb->data; + ptr = skb->data; session_id = ntohl(*((__be32 *)ptr)); ptr += 4; @@ -166,7 +167,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) goto discard_sess; /* Point to L2TP header */ - optr = ptr = skb->data; + optr = skb->data; + ptr = skb->data; ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); -- cgit From 7a379558c28c435681221aa5d84ead8ff19211be Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:50 +0100 Subject: l2tp: WARN_ON rather than BUG_ON in l2tp_dfs_seq_start l2tp_dfs_seq_start had a BUG_ON to catch a possible programming error in l2tp_dfs_seq_open. Since we can easily bail out of l2tp_dfs_seq_start, prefer to do that and flag the error with a WARN_ON rather than crashing the kernel. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 72ba83aa0eaf..96cb9601c21b 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -72,7 +72,10 @@ static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs) if (!pos) goto out; - BUG_ON(!m->private); + if (WARN_ON(!m->private)) { + pd = NULL; + goto out; + } pd = m->private; if (!pd->tunnel) -- cgit From ce2f86ae253de3f3e961d4f91438604a97c29752 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:51 +0100 Subject: l2tp: remove BUG_ON in l2tp_session_queue_purge l2tp_session_queue_purge is only called from l2tp_core.c, and it's easy to statically analyse the code paths calling it to validate that it should never be passed a NULL session pointer. Having a BUG_ON checking the session pointer triggers a checkpatch warning. Since the BUG_ON is of no value, remove it to avoid the warning. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b871cceeff7c..a1ed8baa5aaa 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -777,7 +777,6 @@ static int l2tp_session_queue_purge(struct l2tp_session *session) { struct sk_buff *skb = NULL; - BUG_ON(!session); BUG_ON(session->magic != L2TP_SESSION_MAGIC); while ((skb = skb_dequeue(&session->reorder_q))) { atomic_long_inc(&session->stats.rx_errors); -- cgit From cd3e29b333cc2571a5875d1b45c460dc948a2f95 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:52 +0100 Subject: l2tp: remove BUG_ON in l2tp_tunnel_closeall l2tp_tunnel_closeall is only called from l2tp_core.c, and it's easy to statically analyse the code path calling it to validate that it should never be passed a NULL tunnel pointer. Having a BUG_ON checking the tunnel pointer triggers a checkpatch warning. Since the BUG_ON is of no value, remove it to avoid the warning. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index a1ed8baa5aaa..6be3f2e69efd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1188,8 +1188,6 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) struct hlist_node *tmp; struct l2tp_session *session; - BUG_ON(!tunnel); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing all sessions...\n", tunnel->name); -- cgit From 1aa646ac71feb987a9571e1d70980d10ff495fbc Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:53 +0100 Subject: l2tp: don't BUG_ON session magic checks in l2tp_ppp checkpatch advises that WARN_ON and recovery code are preferred over BUG_ON which crashes the kernel. l2tp_ppp.c's BUG_ON checks of the l2tp session structure's "magic" field occur in code paths where it's reasonably easy to recover: * In the case of pppol2tp_sock_to_session, we can return NULL and the caller will bail out appropriately. There is no change required to any of the callsites of this function since they already handle pppol2tp_sock_to_session returning NULL. * In the case of pppol2tp_session_destruct we can just avoid decrementing the reference count on the suspect session structure. In the worst case scenario this results in a memory leak, which is preferable to a crash. Convert these uses of BUG_ON to WARN_ON accordingly. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 4389df66af35..2aeee648c080 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -163,8 +163,11 @@ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) sock_put(sk); goto out; } - - BUG_ON(session->magic != L2TP_SESSION_MAGIC); + if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) { + session = NULL; + sock_put(sk); + goto out; + } out: return session; @@ -419,7 +422,8 @@ static void pppol2tp_session_destruct(struct sock *sk) if (session) { sk->sk_user_data = NULL; - BUG_ON(session->magic != L2TP_SESSION_MAGIC); + if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) + return; l2tp_session_dec_refcount(session); } } -- cgit From ebb4f5e6e4cd68a856aeea5d319a76f47543c3a9 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:54 +0100 Subject: l2tp: don't BUG_ON seqfile checks in l2tp_ppp checkpatch advises that WARN_ON and recovery code are preferred over BUG_ON which crashes the kernel. l2tp_ppp has a BUG_ON check of struct seq_file's private pointer in pppol2tp_seq_start prior to accessing data through that pointer. Rather than crashing, we can simply bail out early and return NULL in order to terminate the seq file processing in much the same way as we do when reaching the end of tunnel/session instances to render. Retain a WARN_ON to help trace possible bugs in this area. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 2aeee648c080..13c3153b40d6 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1478,7 +1478,11 @@ static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs) if (!pos) goto out; - BUG_ON(!m->private); + if (WARN_ON(!m->private)) { + pd = NULL; + goto out; + } + pd = m->private; net = seq_file_net(m); -- cgit From 493048f5dfcd0093880414928717f68613ba9157 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:55 +0100 Subject: l2tp: WARN_ON rather than BUG_ON in l2tp_session_queue_purge l2tp_session_queue_purge is used during session shutdown to drop any skbs queued for reordering purposes according to L2TP dataplane rules. The BUG_ON in this function checks the session magic feather in an attempt to catch lifetime bugs. Rather than crashing the kernel with a BUG_ON, we can simply WARN_ON and refuse to do anything more -- in the worst case this could result in a leak. However this is highly unlikely given that the session purge only occurs from codepaths which have obtained the session by means of a lookup via. the parent tunnel and which check the session "dead" flag to protect against shutdown races. While we're here, have l2tp_session_queue_purge return void rather than an integer, since neither of the callsites checked the return value. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6be3f2e69efd..e228480fa529 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -773,16 +773,17 @@ EXPORT_SYMBOL(l2tp_recv_common); /* Drop skbs from the session's reorder_q */ -static int l2tp_session_queue_purge(struct l2tp_session *session) +static void l2tp_session_queue_purge(struct l2tp_session *session) { struct sk_buff *skb = NULL; - BUG_ON(session->magic != L2TP_SESSION_MAGIC); + if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) + return; + while ((skb = skb_dequeue(&session->reorder_q))) { atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } - return 0; } /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame -- cgit From 0dd62f69d898de3cf31a2f8b59e9a62bb5448457 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:56 +0100 Subject: l2tp: remove BUG_ON refcount value in l2tp_session_free l2tp_session_free is only called by l2tp_session_dec_refcount when the reference count reaches zero, so it's of limited value to validate the reference count value in l2tp_session_free itself. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e228480fa529..50548c61b91e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1563,8 +1563,6 @@ void l2tp_session_free(struct l2tp_session *session) { struct l2tp_tunnel *tunnel = session->tunnel; - BUG_ON(refcount_read(&session->ref_count) != 0); - if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); l2tp_tunnel_dec_refcount(tunnel); -- cgit From ab6934e084e5eee665adf6e834e5096ebae4a95f Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Fri, 24 Jul 2020 16:31:57 +0100 Subject: l2tp: WARN_ON rather than BUG_ON in l2tp_session_free l2tp_session_free called BUG_ON if the tunnel magic feather value wasn't correct. The intent of this was to catch lifetime bugs; for example early tunnel free due to incorrect use of reference counts. Since the tunnel magic feather being wrong indicates either early free or structure corruption, we can avoid doing more damage by simply leaving the tunnel structure alone. If the tunnel refcount isn't dropped when it should be, the tunnel instance will remain in the kernel, resulting in the tunnel structure and socket leaking. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 50548c61b91e..e723828e458b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1564,10 +1564,12 @@ void l2tp_session_free(struct l2tp_session *session) struct l2tp_tunnel *tunnel = session->tunnel; if (tunnel) { - BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); + if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC)) + goto out; l2tp_tunnel_dec_refcount(tunnel); } +out: kfree(session); } EXPORT_SYMBOL_GPL(l2tp_session_free); -- cgit From c8a2983c4df06c4cd11bea6abfa7e2947bd3113b Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 22 Jul 2020 18:17:19 +0200 Subject: udp: Don't discard reuseport selection when group has connections When BPF socket lookup prog selects a socket that belongs to a reuseport group, and the reuseport group has connected sockets in it, the socket selected by reuseport will be discarded, and socket returned by BPF socket lookup will be used instead. Modify this behavior so that the socket selected by reuseport running after BPF socket lookup always gets used. Ignore the fact that the reuseport group might have connections because it is only relevant when scoring sockets during regular hashtable-based lookup. Fixes: 72f7e9440e9b ("udp: Run SK_LOOKUP BPF program on socket lookup") Fixes: 6d4201b1386b ("udp6: Run SK_LOOKUP BPF program on socket lookup") Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Kuniyuki Iwashima Link: https://lore.kernel.org/bpf/20200722161720.940831-2-jakub@cloudflare.com --- net/ipv6/udp.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5530c9dcb61c..c394e674f486 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -155,9 +155,6 @@ static struct sock *lookup_reuseport(struct net *net, struct sock *sk, hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - /* Fall back to scoring if group has connections */ - if (reuseport_has_conns(sk, false)) - return NULL; } return reuse_sk; } -- cgit From 14fc6bd6b79c430f615500d0fe6cea4722110db8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:09 -0700 Subject: bpf: Refactor bpf_iter_reg to have separate seq_info member There is no functionality change for this patch. Struct bpf_iter_reg is used to register a bpf_iter target, which includes information for both prog_load, link_create and seq_file creation. This patch puts fields related seq_file creation into a different structure. This will be useful for map elements iterator where one iterator covers different map types and different map types may have different seq_ops, init/fini private_data function and private_data size. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184109.590030-1-yhs@fb.com --- net/ipv4/tcp_ipv4.c | 8 ++++++-- net/ipv4/udp.c | 8 ++++++-- net/ipv6/route.c | 8 ++++++-- net/netlink/af_netlink.c | 8 ++++++-- 4 files changed, 24 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8913923a6c0..cb288fdcf2ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2947,17 +2947,21 @@ static void bpf_iter_fini_tcp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg tcp_reg_info = { - .target = "tcp", +static const struct bpf_iter_seq_info tcp_seq_info = { .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, .fini_seq_private = bpf_iter_fini_tcp, .seq_priv_size = sizeof(struct tcp_iter_state), +}; + +static struct bpf_iter_reg tcp_reg_info = { + .target = "tcp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__tcp, sk_common), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &tcp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fb5e4ea133f..1bc50ec2caef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3208,17 +3208,21 @@ static void bpf_iter_fini_udp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg udp_reg_info = { - .target = "udp", +static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, .seq_priv_size = sizeof(struct udp_iter_state), +}; + +static struct bpf_iter_reg udp_reg_info = { + .target = "udp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &udp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 33f5efbad0a9..8bfc57b0802a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6427,17 +6427,21 @@ DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *r BTF_ID_LIST(btf_fib6_info_id) BTF_ID(struct, fib6_info) -static struct bpf_iter_reg ipv6_route_reg_info = { - .target = "ipv6_route", +static const struct bpf_iter_seq_info ipv6_route_seq_info = { .seq_ops = &ipv6_route_seq_ops, .init_seq_private = bpf_iter_init_seq_net, .fini_seq_private = bpf_iter_fini_seq_net, .seq_priv_size = sizeof(struct ipv6_route_iter), +}; + +static struct bpf_iter_reg ipv6_route_reg_info = { + .target = "ipv6_route", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__ipv6_route, rt), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &ipv6_route_seq_info, }; static int __init bpf_iter_register(void) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d8921b833744..b5f30d7d30d0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2807,17 +2807,21 @@ static const struct rhashtable_params netlink_rhashtable_params = { BTF_ID_LIST(btf_netlink_sock_id) BTF_ID(struct, netlink_sock) -static struct bpf_iter_reg netlink_reg_info = { - .target = "netlink", +static const struct bpf_iter_seq_info netlink_seq_info = { .seq_ops = &netlink_seq_ops, .init_seq_private = bpf_iter_init_seq_net, .fini_seq_private = bpf_iter_fini_seq_net, .seq_priv_size = sizeof(struct nl_seq_iter), +}; + +static struct bpf_iter_reg netlink_reg_info = { + .target = "netlink", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__netlink, sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &netlink_seq_info, }; static int __init bpf_iter_register(void) -- cgit From f9c792729581bd8b8473af163e8ab426c2c61d89 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:10 -0700 Subject: bpf: Refactor to provide aux info to bpf_iter_init_seq_priv_t This patch refactored target bpf_iter_init_seq_priv_t callback function to accept additional information. This will be needed in later patches for map element targets since a particular map should be passed to traverse elements for that particular map. In the future, other information may be passed to target as well, e.g., pid, cgroup id, etc. to customize the iterator. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184110.590156-1-yhs@fb.com --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/udp.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cb288fdcf2ca..5084333b5ab6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2921,7 +2921,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, struct sock_common *sk_common, uid_t uid) -static int bpf_iter_init_tcp(void *priv_data) +static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { struct tcp_iter_state *st = priv_data; struct tcp_seq_afinfo *afinfo; @@ -2933,7 +2933,7 @@ static int bpf_iter_init_tcp(void *priv_data) afinfo->family = AF_UNSPEC; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1bc50ec2caef..7ce31beccfc2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3181,7 +3181,7 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) -static int bpf_iter_init_udp(void *priv_data) +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) { struct udp_iter_state *st = priv_data; struct udp_seq_afinfo *afinfo; @@ -3194,7 +3194,7 @@ static int bpf_iter_init_udp(void *priv_data) afinfo->family = AF_UNSPEC; afinfo->udp_table = &udp_table; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; -- cgit From 5ce6e77c7edf7310a0ff9532fd6b9693c082ab32 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 23 Jul 2020 11:41:16 -0700 Subject: bpf: Implement bpf iterator for sock local storage map The bpf iterator for bpf sock local storage map is implemented. User space interacts with sock local storage map with fd as a key and storage value. In kernel, passing fd to the bpf program does not really make sense. In this case, the sock itself is passed to bpf program. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200723184116.590602-1-yhs@fb.com --- net/core/bpf_sk_storage.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) (limited to 'net') diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 6f921c4ddc2c..eafcd15e7dfd 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1217,3 +1218,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, return err; } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); + +struct bpf_iter_seq_sk_storage_map_info { + struct bpf_map *map; + unsigned int bucket_id; + unsigned skip_elems; +}; + +static struct bpf_sk_storage_elem * +bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, + struct bpf_sk_storage_elem *prev_selem) +{ + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + u32 skip_elems = info->skip_elems; + struct bpf_sk_storage_map *smap; + u32 bucket_id = info->bucket_id; + u32 i, count, n_buckets; + struct bucket *b; + + smap = (struct bpf_sk_storage_map *)info->map; + n_buckets = 1U << smap->bucket_log; + if (bucket_id >= n_buckets) + return NULL; + + /* try to find next selem in the same bucket */ + selem = prev_selem; + count = 0; + while (selem) { + selem = hlist_entry_safe(selem->map_node.next, + struct bpf_sk_storage_elem, map_node); + if (!selem) { + /* not found, unlock and go to the next bucket */ + b = &smap->buckets[bucket_id++]; + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + break; + } + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage) { + info->skip_elems = skip_elems + count; + return selem; + } + count++; + } + + for (i = bucket_id; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + raw_spin_lock_bh(&b->lock); + count = 0; + hlist_for_each_entry(selem, &b->list, map_node) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage && count >= skip_elems) { + info->bucket_id = i; + info->skip_elems = count; + return selem; + } + count++; + } + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + } + + info->bucket_id = i; + info->skip_elems = 0; + return NULL; +} + +static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_sk_storage_elem *selem; + + selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); + if (!selem) + return NULL; + + if (*pos == 0) + ++*pos; + return selem; +} + +static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + + ++*pos; + ++info->skip_elems; + return bpf_sk_storage_map_seq_find_next(seq->private, v); +} + +struct bpf_iter__bpf_sk_storage_map { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(struct sock *, sk); + __bpf_md_ptr(void *, value); +}; + +DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, + struct bpf_map *map, struct sock *sk, + void *value) + +static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_iter__bpf_sk_storage_map ctx = {}; + struct bpf_sk_storage *sk_storage; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, selem == NULL); + if (prog) { + ctx.meta = &meta; + ctx.map = info->map; + if (selem) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + ctx.sk = sk_storage->sk; + ctx.value = SDATA(selem)->data; + } + ret = bpf_iter_run_prog(prog, &ctx); + } + + return ret; +} + +static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_sk_storage_map_seq_show(seq, v); +} + +static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_sk_storage_map *smap; + struct bucket *b; + + if (!v) { + (void)__bpf_sk_storage_map_seq_show(seq, v); + } else { + smap = (struct bpf_sk_storage_map *)info->map; + b = &smap->buckets[info->bucket_id]; + raw_spin_unlock_bh(&b->lock); + } +} + +static int bpf_iter_init_sk_storage_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + + seq_info->map = aux->map; + return 0; +} + +static int bpf_iter_check_map(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux) +{ + struct bpf_map *map = aux->map; + + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) + return -EINVAL; + + if (prog->aux->max_rdonly_access > map->value_size) + return -EACCES; + + return 0; +} + +static const struct seq_operations bpf_sk_storage_map_seq_ops = { + .start = bpf_sk_storage_map_seq_start, + .next = bpf_sk_storage_map_seq_next, + .stop = bpf_sk_storage_map_seq_stop, + .show = bpf_sk_storage_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_sk_storage_map_seq_ops, + .init_seq_private = bpf_iter_init_sk_storage_map, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), +}; + +static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { + .target = "bpf_sk_storage_map", + .check_target = bpf_iter_check_map, + .req_linfo = BPF_ITER_LINK_MAP_FD, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), + PTR_TO_BTF_ID_OR_NULL }, + { offsetof(struct bpf_iter__bpf_sk_storage_map, value), + PTR_TO_RDWR_BUF_OR_NULL }, + }, + .seq_info = &iter_seq_info, +}; + +static int __init bpf_sk_storage_map_iter_init(void) +{ + bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = + btf_sock_ids[BTF_SOCK_TYPE_SOCK]; + return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); +} +late_initcall(bpf_sk_storage_map_iter_init); -- cgit From 7f0a838254bdd9114b978ef2541a6ce330307e9e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:55 -0700 Subject: bpf, xdp: Maintain info on attached XDP BPF programs in net_device Instead of delegating to drivers, maintain information about which BPF programs are attached in which XDP modes (generic/skb, driver, or hardware) locally in net_device. This effectively obsoletes XDP_QUERY_PROG command. Such re-organization simplifies existing code already. But it also allows to further add bpf_link-based XDP attachments without drivers having to know about any of this at all, which seems like a good setup. XDP_SETUP_PROG/XDP_SETUP_PROG_HW are just low-level commands to driver to install/uninstall active BPF program. All the higher-level concerns about prog/link interaction will be contained within generic driver-agnostic logic. All the XDP_QUERY_PROG calls to driver in dev_xdp_uninstall() were removed. It's not clear for me why dev_xdp_uninstall() were passing previous prog_flags when resetting installed programs. That seems unnecessary, plus most drivers don't populate prog_flags anyways. Having XDP_SETUP_PROG vs XDP_SETUP_PROG_HW should be enough of an indicator of what is required of driver to correctly reset active BPF program. dev_xdp_uninstall() is also generalized as an iteration over all three supported mode. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-3-andriin@fb.com --- net/core/dev.c | 158 ++++++++++++++++++++++++++++----------------------- net/core/rtnetlink.c | 5 +- 2 files changed, 90 insertions(+), 73 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fe2e387eed29..bf38fde667e9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8716,84 +8716,103 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down_generic); -u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, - enum bpf_netdev_command cmd) +static enum bpf_xdp_mode dev_xdp_mode(u32 flags) { - struct netdev_bpf xdp; + if (flags & XDP_FLAGS_HW_MODE) + return XDP_MODE_HW; + if (flags & XDP_FLAGS_DRV_MODE) + return XDP_MODE_DRV; + return XDP_MODE_SKB; +} - if (!bpf_op) - return 0; +static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) +{ + switch (mode) { + case XDP_MODE_SKB: + return generic_xdp_install; + case XDP_MODE_DRV: + case XDP_MODE_HW: + return dev->netdev_ops->ndo_bpf; + default: + return NULL; + }; +} - memset(&xdp, 0, sizeof(xdp)); - xdp.command = cmd; +static struct bpf_prog *dev_xdp_prog(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + return dev->xdp_state[mode].prog; +} + +u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) +{ + struct bpf_prog *prog = dev_xdp_prog(dev, mode); - /* Query must always succeed. */ - WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG); + return prog ? prog->aux->id : 0; +} - return xdp.prog_id; +static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_prog *prog) +{ + dev->xdp_state[mode].prog = prog; } -static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, - struct netlink_ext_ack *extack, u32 flags, - struct bpf_prog *prog) +static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, + bpf_op_t bpf_op, struct netlink_ext_ack *extack, + u32 flags, struct bpf_prog *prog) { - bool non_hw = !(flags & XDP_FLAGS_HW_MODE); - struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; int err; - if (non_hw) { - prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, - XDP_QUERY_PROG)); - if (IS_ERR(prev_prog)) - prev_prog = NULL; - } - memset(&xdp, 0, sizeof(xdp)); - if (flags & XDP_FLAGS_HW_MODE) - xdp.command = XDP_SETUP_PROG_HW; - else - xdp.command = XDP_SETUP_PROG; + xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; xdp.extack = extack; xdp.flags = flags; xdp.prog = prog; + /* Drivers assume refcnt is already incremented (i.e, prog pointer is + * "moved" into driver), so they don't increment it on their own, but + * they do decrement refcnt when program is detached or replaced. + * Given net_device also owns link/prog, we need to bump refcnt here + * to prevent drivers from underflowing it. + */ + if (prog) + bpf_prog_inc(prog); err = bpf_op(dev, &xdp); - if (!err && non_hw) - bpf_prog_change_xdp(prev_prog, prog); + if (err) { + if (prog) + bpf_prog_put(prog); + return err; + } - if (prev_prog) - bpf_prog_put(prev_prog); + if (mode != XDP_MODE_HW) + bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog); - return err; + return 0; } static void dev_xdp_uninstall(struct net_device *dev) { - struct netdev_bpf xdp; - bpf_op_t ndo_bpf; + struct bpf_prog *prog; + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; - /* Remove generic XDP */ - WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); + ASSERT_RTNL(); - /* Remove from the driver */ - ndo_bpf = dev->netdev_ops->ndo_bpf; - if (!ndo_bpf) - return; + for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) { + prog = dev_xdp_prog(dev, mode); + if (!prog) + continue; - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - WARN_ON(ndo_bpf(dev, &xdp)); - if (xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) + continue; - /* Remove HW offload */ - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG_HW; - if (!ndo_bpf(dev, &xdp) && xdp.prog_id) - WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, - NULL)); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + + bpf_prog_put(prog); + dev_xdp_set_prog(dev, mode, NULL); + } } /** @@ -8810,29 +8829,22 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags) { const struct net_device_ops *ops = dev->netdev_ops; - enum bpf_netdev_command query; + enum bpf_xdp_mode mode = dev_xdp_mode(flags); + bool offload = mode == XDP_MODE_HW; u32 prog_id, expected_id = 0; - bpf_op_t bpf_op, bpf_chk; struct bpf_prog *prog; - bool offload; + bpf_op_t bpf_op; int err; ASSERT_RTNL(); - offload = flags & XDP_FLAGS_HW_MODE; - query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; - - bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) { NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); return -EOPNOTSUPP; } - if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) - bpf_op = generic_xdp_install; - if (bpf_op == bpf_chk) - bpf_chk = generic_xdp_install; - prog_id = __dev_xdp_query(dev, bpf_op, query); + prog_id = dev_xdp_prog_id(dev, mode); if (flags & XDP_FLAGS_REPLACE) { if (expected_fd >= 0) { prog = bpf_prog_get_type_dev(expected_fd, @@ -8850,8 +8862,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, } } if (fd >= 0) { - if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { - NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); + enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB + ? XDP_MODE_DRV : XDP_MODE_SKB; + + if (!offload && dev_xdp_prog_id(dev, other_mode)) { + NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); return -EEXIST; } @@ -8866,7 +8881,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return PTR_ERR(prog); if (!offload && bpf_prog_is_dev_bound(prog->aux)) { - NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); + NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); bpf_prog_put(prog); return -EINVAL; } @@ -8895,11 +8910,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, prog = NULL; } - err = dev_xdp_install(dev, bpf_op, extack, flags, prog); - if (err < 0 && prog) + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, prog); + if (err < 0 && prog) { bpf_prog_put(prog); + return err; + } + dev_xdp_set_prog(dev, mode, prog); - return err; + return 0; } /** diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 85a4b0101f76..58c484a28395 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1416,13 +1416,12 @@ static u32 rtnl_xdp_prog_skb(struct net_device *dev) static u32 rtnl_xdp_prog_drv(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG); + return dev_xdp_prog_id(dev, XDP_MODE_DRV); } static u32 rtnl_xdp_prog_hw(struct net_device *dev) { - return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, - XDP_QUERY_PROG_HW); + return dev_xdp_prog_id(dev, XDP_MODE_HW); } static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev, -- cgit From d4baa9368a5e4d694e787e0442ddd6ab95d6fd96 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:56 -0700 Subject: bpf, xdp: Extract common XDP program attachment logic Further refactor XDP attachment code. dev_change_xdp_fd() is split into two parts: getting bpf_progs from FDs and attachment logic, working with bpf_progs. This makes attachment logic a bit more straightforward and prepares code for bpf_xdp_link inclusion, which will share the common logic. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-4-andriin@fb.com --- net/core/dev.c | 165 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 91 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index bf38fde667e9..521ce031ee35 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8815,111 +8815,128 @@ static void dev_xdp_uninstall(struct net_device *dev) } } -/** - * dev_change_xdp_fd - set or clear a bpf program for a device rx path - * @dev: device - * @extack: netlink extended ack - * @fd: new program fd or negative value to clear - * @expected_fd: old program fd that userspace expects to replace or clear - * @flags: xdp-related flags - * - * Set or clear a bpf program for a device - */ -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, int expected_fd, u32 flags) +static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, + struct bpf_prog *new_prog, struct bpf_prog *old_prog, + u32 flags) { - const struct net_device_ops *ops = dev->netdev_ops; - enum bpf_xdp_mode mode = dev_xdp_mode(flags); - bool offload = mode == XDP_MODE_HW; - u32 prog_id, expected_id = 0; - struct bpf_prog *prog; + struct bpf_prog *cur_prog; + enum bpf_xdp_mode mode; bpf_op_t bpf_op; int err; ASSERT_RTNL(); - bpf_op = dev_xdp_bpf_op(dev, mode); - if (!bpf_op) { - NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); - return -EOPNOTSUPP; + /* just one XDP mode bit should be set, zero defaults to SKB mode */ + if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); + return -EINVAL; + } + /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ + if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { + NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); + return -EINVAL; } - prog_id = dev_xdp_prog_id(dev, mode); - if (flags & XDP_FLAGS_REPLACE) { - if (expected_fd >= 0) { - prog = bpf_prog_get_type_dev(expected_fd, - BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); - expected_id = prog->aux->id; - bpf_prog_put(prog); - } - - if (prog_id != expected_id) { - NL_SET_ERR_MSG(extack, "Active program does not match expected"); - return -EEXIST; - } + mode = dev_xdp_mode(flags); + cur_prog = dev_xdp_prog(dev, mode); + if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { + NL_SET_ERR_MSG(extack, "Active program does not match expected"); + return -EEXIST; } - if (fd >= 0) { + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); + return -EBUSY; + } + + if (new_prog) { + bool offload = mode == XDP_MODE_HW; enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB ? XDP_MODE_DRV : XDP_MODE_SKB; - if (!offload && dev_xdp_prog_id(dev, other_mode)) { + if (!offload && dev_xdp_prog(dev, other_mode)) { NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); return -EEXIST; } - - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { - NL_SET_ERR_MSG(extack, "XDP program already attached"); - return -EBUSY; - } - - prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - if (!offload && bpf_prog_is_dev_bound(prog->aux)) { + if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) { NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported"); - bpf_prog_put(prog); return -EINVAL; } - - if (prog->expected_attach_type == BPF_XDP_DEVMAP) { + if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); - bpf_prog_put(prog); return -EINVAL; } - - if (prog->expected_attach_type == BPF_XDP_CPUMAP) { - NL_SET_ERR_MSG(extack, - "BPF_XDP_CPUMAP programs can not be attached to a device"); - bpf_prog_put(prog); + if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) { + NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device"); return -EINVAL; } + } - /* prog->aux->id may be 0 for orphaned device-bound progs */ - if (prog->aux->id && prog->aux->id == prog_id) { - bpf_prog_put(prog); - return 0; + /* don't call drivers if the effective program didn't change */ + if (new_prog != cur_prog) { + bpf_op = dev_xdp_bpf_op(dev, mode); + if (!bpf_op) { + NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode"); + return -EOPNOTSUPP; } - } else { - if (!prog_id) - return 0; - prog = NULL; - } - err = dev_xdp_install(dev, mode, bpf_op, extack, flags, prog); - if (err < 0 && prog) { - bpf_prog_put(prog); - return err; + err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog); + if (err) + return err; } - dev_xdp_set_prog(dev, mode, prog); + + dev_xdp_set_prog(dev, mode, new_prog); + if (cur_prog) + bpf_prog_put(cur_prog); return 0; } +/** + * dev_change_xdp_fd - set or clear a bpf program for a device rx path + * @dev: device + * @extack: netlink extended ack + * @fd: new program fd or negative value to clear + * @expected_fd: old program fd that userspace expects to replace or clear + * @flags: xdp-related flags + * + * Set or clear a bpf program for a device + */ +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, int expected_fd, u32 flags) +{ + enum bpf_xdp_mode mode = dev_xdp_mode(flags); + struct bpf_prog *new_prog = NULL, *old_prog = NULL; + int err; + + ASSERT_RTNL(); + + if (fd >= 0) { + new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(new_prog)) + return PTR_ERR(new_prog); + } + + if (expected_fd >= 0) { + old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP, + mode != XDP_MODE_SKB); + if (IS_ERR(old_prog)) { + err = PTR_ERR(old_prog); + old_prog = NULL; + goto err_out; + } + } + + err = dev_xdp_attach(dev, extack, new_prog, old_prog, flags); + +err_out: + if (err && new_prog) + bpf_prog_put(new_prog); + if (old_prog) + bpf_prog_put(old_prog); + return err; +} + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace -- cgit From aa8d3a716b59db6c1ad6c68fb8aa05e31980da60 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:57 -0700 Subject: bpf, xdp: Add bpf_link-based XDP attachment API Add bpf_link-based API (bpf_xdp_link) to attach BPF XDP program through BPF_LINK_CREATE command. bpf_xdp_link is mutually exclusive with direct BPF program attachment, previous BPF program should be detached prior to attempting to create a new bpf_xdp_link attachment (for a given XDP mode). Once BPF link is attached, it can't be replaced by other BPF program attachment or link attachment. It will be detached only when the last BPF link FD is closed. bpf_xdp_link will be auto-detached when net_device is shutdown, similarly to how other BPF links behave (cgroup, flow_dissector). At that point bpf_link will become defunct, but won't be destroyed until last FD is closed. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-5-andriin@fb.com --- net/core/dev.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 163 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 521ce031ee35..e24248f3d675 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8716,6 +8716,12 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down_generic); +struct bpf_xdp_link { + struct bpf_link link; + struct net_device *dev; /* protected by rtnl_lock, no refcnt held */ + int flags; +}; + static enum bpf_xdp_mode dev_xdp_mode(u32 flags) { if (flags & XDP_FLAGS_HW_MODE) @@ -8738,9 +8744,19 @@ static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) }; } +static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev, + enum bpf_xdp_mode mode) +{ + return dev->xdp_state[mode].link; +} + static struct bpf_prog *dev_xdp_prog(struct net_device *dev, enum bpf_xdp_mode mode) { + struct bpf_xdp_link *link = dev_xdp_link(dev, mode); + + if (link) + return link->link.prog; return dev->xdp_state[mode].prog; } @@ -8751,9 +8767,17 @@ u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) return prog ? prog->aux->id : 0; } +static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode, + struct bpf_xdp_link *link) +{ + dev->xdp_state[mode].link = link; + dev->xdp_state[mode].prog = NULL; +} + static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode, struct bpf_prog *prog) { + dev->xdp_state[mode].link = NULL; dev->xdp_state[mode].prog = prog; } @@ -8793,6 +8817,7 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, static void dev_xdp_uninstall(struct net_device *dev) { + struct bpf_xdp_link *link; struct bpf_prog *prog; enum bpf_xdp_mode mode; bpf_op_t bpf_op; @@ -8810,14 +8835,20 @@ static void dev_xdp_uninstall(struct net_device *dev) WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); - bpf_prog_put(prog); - dev_xdp_set_prog(dev, mode, NULL); + /* auto-detach link from net device */ + link = dev_xdp_link(dev, mode); + if (link) + link->dev = NULL; + else + bpf_prog_put(prog); + + dev_xdp_set_link(dev, mode, NULL); } } static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack, - struct bpf_prog *new_prog, struct bpf_prog *old_prog, - u32 flags) + struct bpf_xdp_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog, u32 flags) { struct bpf_prog *cur_prog; enum bpf_xdp_mode mode; @@ -8826,6 +8857,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack ASSERT_RTNL(); + /* either link or prog attachment, never both */ + if (link && (new_prog || old_prog)) + return -EINVAL; + /* link supports only XDP mode flags */ + if (link && (flags & ~XDP_FLAGS_MODES)) { + NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); + return -EINVAL; + } /* just one XDP mode bit should be set, zero defaults to SKB mode */ if (hweight32(flags & XDP_FLAGS_MODES) > 1) { NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); @@ -8838,7 +8877,18 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack } mode = dev_xdp_mode(flags); + /* can't replace attached link */ + if (dev_xdp_link(dev, mode)) { + NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); + return -EBUSY; + } + cur_prog = dev_xdp_prog(dev, mode); + /* can't replace attached prog with link */ + if (link && cur_prog) { + NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link"); + return -EBUSY; + } if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) { NL_SET_ERR_MSG(extack, "Active program does not match expected"); return -EEXIST; @@ -8848,6 +8898,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EBUSY; } + /* put effective new program into new_prog */ + if (link) + new_prog = link->link.prog; + if (new_prog) { bool offload = mode == XDP_MODE_HW; enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB @@ -8884,13 +8938,116 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return err; } - dev_xdp_set_prog(dev, mode, new_prog); + if (link) + dev_xdp_set_link(dev, mode, link); + else + dev_xdp_set_prog(dev, mode, new_prog); if (cur_prog) bpf_prog_put(cur_prog); return 0; } +static int dev_xdp_attach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags); +} + +static int dev_xdp_detach_link(struct net_device *dev, + struct netlink_ext_ack *extack, + struct bpf_xdp_link *link) +{ + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + + ASSERT_RTNL(); + + mode = dev_xdp_mode(link->flags); + if (dev_xdp_link(dev, mode) != link) + return -EINVAL; + + bpf_op = dev_xdp_bpf_op(dev, mode); + WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL)); + dev_xdp_set_link(dev, mode, NULL); + return 0; +} + +static void bpf_xdp_link_release(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + rtnl_lock(); + + /* if racing with net_device's tear down, xdp_link->dev might be + * already NULL, in which case link was already auto-detached + */ + if (xdp_link->dev) + WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + + rtnl_unlock(); +} + +static void bpf_xdp_link_dealloc(struct bpf_link *link) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + + kfree(xdp_link); +} + +static const struct bpf_link_ops bpf_xdp_link_lops = { + .release = bpf_xdp_link_release, + .dealloc = bpf_xdp_link_dealloc, +}; + +int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_link_primer link_primer; + struct bpf_xdp_link *link; + struct net_device *dev; + int err, fd; + + dev = dev_get_by_index(net, attr->link_create.target_ifindex); + if (!dev) + return -EINVAL; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out_put_dev; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); + link->dev = dev; + link->flags = attr->link_create.flags; + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + goto out_put_dev; + } + + rtnl_lock(); + err = dev_xdp_attach_link(dev, NULL, link); + rtnl_unlock(); + + if (err) { + bpf_link_cleanup(&link_primer); + goto out_put_dev; + } + + fd = bpf_link_settle(&link_primer); + /* link itself doesn't hold dev's refcnt to not complicate shutdown */ + dev_put(dev); + return fd; + +out_put_dev: + dev_put(dev); + return err; +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device @@ -8927,7 +9084,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, } } - err = dev_xdp_attach(dev, extack, new_prog, old_prog, flags); + err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags); err_out: if (err && new_prog) -- cgit From 026a4c28e1db3b0cb99cd9a3e495d4a8b632fa74 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:58 -0700 Subject: bpf, xdp: Implement LINK_UPDATE for BPF XDP link Add support for LINK_UPDATE command for BPF XDP link to enable reliable replacement of underlying BPF program. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-6-andriin@fb.com --- net/core/dev.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e24248f3d675..49f284f51a22 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8996,9 +8996,52 @@ static void bpf_xdp_link_dealloc(struct bpf_link *link) kfree(xdp_link); } +static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + enum bpf_xdp_mode mode; + bpf_op_t bpf_op; + int err = 0; + + rtnl_lock(); + + /* link might have been auto-released already, so fail */ + if (!xdp_link->dev) { + err = -ENOLINK; + goto out_unlock; + } + + if (old_prog && link->prog != old_prog) { + err = -EPERM; + goto out_unlock; + } + old_prog = link->prog; + if (old_prog == new_prog) { + /* no-op, don't disturb drivers */ + bpf_prog_put(new_prog); + goto out_unlock; + } + + mode = dev_xdp_mode(xdp_link->flags); + bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); + err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, + xdp_link->flags, new_prog); + if (err) + goto out_unlock; + + old_prog = xchg(&link->prog, new_prog); + bpf_prog_put(old_prog); + +out_unlock: + rtnl_unlock(); + return err; +} + static const struct bpf_link_ops bpf_xdp_link_lops = { .release = bpf_xdp_link_release, .dealloc = bpf_xdp_link_dealloc, + .update_prog = bpf_xdp_link_update, }; int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) -- cgit From c1931c9784ebb5787c0784c112fb8baa5e8455b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:45:59 -0700 Subject: bpf: Implement BPF XDP link-specific introspection APIs Implement XDP link-specific show_fdinfo and link_info to emit ifindex. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-7-andriin@fb.com --- net/core/dev.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 49f284f51a22..82ce0920b172 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8996,6 +8996,35 @@ static void bpf_xdp_link_dealloc(struct bpf_link *link) kfree(xdp_link); } +static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + seq_printf(seq, "ifindex:\t%u\n", ifindex); +} + +static int bpf_xdp_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); + u32 ifindex = 0; + + rtnl_lock(); + if (xdp_link->dev) + ifindex = xdp_link->dev->ifindex; + rtnl_unlock(); + + info->xdp.ifindex = ifindex; + return 0; +} + static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog) { @@ -9041,6 +9070,8 @@ out_unlock: static const struct bpf_link_ops bpf_xdp_link_lops = { .release = bpf_xdp_link_release, .dealloc = bpf_xdp_link_dealloc, + .show_fdinfo = bpf_xdp_link_show_fdinfo, + .fill_link_info = bpf_xdp_link_fill_link_info, .update_prog = bpf_xdp_link_update, }; -- cgit From e8407fdeb9a6866784e249881f6c786a0835faba Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 21 Jul 2020 23:46:02 -0700 Subject: bpf, xdp: Remove XDP_QUERY_PROG and XDP_QUERY_PROG_HW XDP commands Now that BPF program/link management is centralized in generic net_device code, kernel code never queries program id from drivers, so XDP_QUERY_PROG/XDP_QUERY_PROG_HW commands are unnecessary. This patch removes all the implementations of those commands in kernel, along the xdp_attachment_query(). This patch was compile-tested on allyesconfig. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200722064603.3350758-10-andriin@fb.com --- net/core/dev.c | 4 ---- net/core/xdp.c | 9 --------- 2 files changed, 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 82ce0920b172..a2a57988880a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5468,10 +5468,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) } break; - case XDP_QUERY_PROG: - xdp->prog_id = old ? old->aux->id : 0; - break; - default: ret = -EINVAL; break; diff --git a/net/core/xdp.c b/net/core/xdp.c index 3c45f99e26d5..48aba933a5a8 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -400,15 +400,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -int xdp_attachment_query(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - bpf->prog_id = info->prog ? info->prog->aux->id : 0; - bpf->prog_flags = info->prog ? info->flags : 0; - return 0; -} -EXPORT_SYMBOL_GPL(xdp_attachment_query); - bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { -- cgit From 72b7f6c48708e4524765a2f1316063207d8e0cd5 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Sun, 26 Jul 2020 20:34:28 +0200 Subject: net/smc: unique reason code for exceeded max dmb count When the maximum dmb buffer limit for an ism device is reached no more dmb buffers can be registered. When this happens the reason code is set to SMC_CLC_DECL_MEM indicating out-of-memory. This is the same reason code that is used when no memory could be allocated for the new dmb buffer. This is confusing for users when they see this error but there is more memory available. To solve this set a separate new reason code when the maximum dmb limit exceeded. Reviewed-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 13 +++++++++---- net/smc/smc_clc.h | 1 + net/smc/smc_core.c | 4 ++-- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 832e36269b10..e7649bbc2b87 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -719,8 +719,11 @@ static int smc_connect_ism(struct smc_sock *smc, } /* Create send and receive buffers */ - if (smc_buf_create(smc, true)) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, + rc = smc_buf_create(smc, true); + if (rc) + return smc_connect_abort(smc, (rc == -ENOSPC) ? + SMC_CLC_DECL_MAX_DMB : + SMC_CLC_DECL_MEM, ini->cln_first_contact); smc_conn_save_peer_info(smc, aclc); @@ -1200,12 +1203,14 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, } /* Create send and receive buffers */ - if (smc_buf_create(new_smc, true)) { + rc = smc_buf_create(new_smc, true); + if (rc) { if (ini->cln_first_contact == SMC_FIRST_CONTACT) smc_lgr_cleanup_early(&new_smc->conn); else smc_conn_free(&new_smc->conn); - return SMC_CLC_DECL_MEM; + return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : + SMC_CLC_DECL_MEM; } return 0; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 76c2b150d040..cf7b45306f4e 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -48,6 +48,7 @@ #define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */ #define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */ #define SMC_CLC_DECL_VERSMISMAT 0x030c0000 /* SMC version mismatch */ +#define SMC_CLC_DECL_MAX_DMB 0x030d0000 /* SMC-D DMB limit exceeded */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f82a2e599917..b42fa3b00d00 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1614,7 +1614,7 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); if (rc) { kfree(buf_desc); - return ERR_PTR(-EAGAIN); + return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc); } buf_desc->pages = virt_to_page(buf_desc->cpu_addr); /* CDC header stored in buf. So, pretend it was smaller */ @@ -1688,7 +1688,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) } if (IS_ERR(buf_desc)) - return -ENOMEM; + return PTR_ERR(buf_desc); if (!is_smcd) { if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { -- cgit From 73cb11933c414b744193d9de660010082bc0a944 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 27 Jul 2020 10:18:34 +0300 Subject: ipmr: Copy option to correct variable Cited commit mistakenly copied provided option to 'val' instead of to 'mfc': ``` - if (copy_from_user(&mfc, optval, sizeof(mfc))) { + if (copy_from_sockptr(&val, optval, sizeof(val))) { ``` Fix this by copying the option to 'mfc'. selftest router_multicast.sh before: $ ./router_multicast.sh smcroutectl: Unknown or malformed IPC message 'a' from client. smcroutectl: failed removing multicast route, does not exist. TEST: mcast IPv4 [FAIL] Multicast not received on first host TEST: mcast IPv6 [ OK ] smcroutectl: Unknown or malformed IPC message 'a' from client. smcroutectl: failed removing multicast route, does not exist. TEST: RPF IPv4 [FAIL] Multicast not received on first host TEST: RPF IPv6 [ OK ] selftest router_multicast.sh after: $ ./router_multicast.sh TEST: mcast IPv4 [ OK ] TEST: mcast IPv6 [ OK ] TEST: RPF IPv4 [ OK ] TEST: RPF IPv6 [ OK ] Fixes: 01ccb5b48f08 ("net/ipv4: switch ip_mroute_setsockopt to sockptr_t") Signed-off-by: Ido Schimmel Reviewed-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index cdf3a40f9ff5..876fd6ff1ff9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1441,7 +1441,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, ret = -EINVAL; break; } - if (copy_from_sockptr(&val, optval, sizeof(val))) { + if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) { ret = -EFAULT; break; } -- cgit From 350e7ab92da8420fe97e1edf040db8c9bab750f8 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 27 Jul 2020 13:09:19 +0530 Subject: net: Removed the device type check to add mpls support for devices MPLS has no dependency with the device type of underlying devices. Hence the device type check to add mpls support for devices can be avoided. Signed-off-by: Martin Varghese Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index fd30ea61336e..6fdd0c9f865a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1584,21 +1584,10 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, unsigned int flags; if (event == NETDEV_REGISTER) { + mdev = mpls_add_dev(dev); + if (IS_ERR(mdev)) + return notifier_from_errno(PTR_ERR(mdev)); - /* For now just support Ethernet, IPGRE, IP6GRE, SIT and - * IPIP devices - */ - if (dev->type == ARPHRD_ETHER || - dev->type == ARPHRD_LOOPBACK || - dev->type == ARPHRD_IPGRE || - dev->type == ARPHRD_IP6GRE || - dev->type == ARPHRD_SIT || - dev->type == ARPHRD_TUNNEL || - dev->type == ARPHRD_TUNNEL6) { - mdev = mpls_add_dev(dev); - if (IS_ERR(mdev)) - return notifier_from_errno(PTR_ERR(mdev)); - } return NOTIFY_OK; } -- cgit From 8f4c0e01789c18674acdf17cae3822b3dc3db715 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:16 -0400 Subject: hsr: enhance netlink socket interface to support PRP Parallel Redundancy Protocol (PRP) is another redundancy protocol introduced by IEC 63439 standard. It is similar to HSR in many aspects:- - Use a pair of Ethernet interfaces to created the PRP device - Use a 6 byte redundancy protocol part (RCT, Redundancy Check Trailer) similar to HSR Tag. - Has Link Redundancy Entity (LRE) that works with RCT to implement redundancy. Key difference is that the protocol unit is a trailer instead of a prefix as in HSR. That makes it inter-operable with tradition network components such as bridges/switches which treat it as pad bytes, whereas HSR nodes requires some kind of translators (Called redbox) to talk to regular network devices. This features allows regular linux box to be converted to a DAN-P box. DAN-P stands for Dual Attached Node - PRP similar to DAN-H (Dual Attached Node - HSR). Add a comment at the header/source code to explicitly state that the driver files also handles PRP protocol as well. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/Kconfig | 35 +++++++++++++++++++++++------------ net/hsr/hsr_debugfs.c | 2 +- net/hsr/hsr_device.c | 7 +++++-- net/hsr/hsr_device.h | 2 ++ net/hsr/hsr_forward.c | 2 ++ net/hsr/hsr_forward.h | 2 ++ net/hsr/hsr_framereg.c | 1 + net/hsr/hsr_framereg.h | 2 ++ net/hsr/hsr_main.c | 2 ++ net/hsr/hsr_main.h | 11 ++++++++++- net/hsr/hsr_netlink.c | 38 +++++++++++++++++++++++++++++++------- net/hsr/hsr_netlink.h | 2 ++ net/hsr/hsr_slave.c | 2 ++ net/hsr/hsr_slave.h | 2 ++ 14 files changed, 87 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig index 8095b034e76e..1b048c17b6c8 100644 --- a/net/hsr/Kconfig +++ b/net/hsr/Kconfig @@ -4,24 +4,35 @@ # config HSR - tristate "High-availability Seamless Redundancy (HSR)" + tristate "High-availability Seamless Redundancy (HSR & PRP)" help + This enables IEC 62439 defined High-availability Seamless + Redundancy (HSR) and Parallel Redundancy Protocol (PRP). + If you say Y here, then your Linux box will be able to act as a - DANH ("Doubly attached node implementing HSR"). For this to work, - your Linux box needs (at least) two physical Ethernet interfaces, - and it must be connected as a node in a ring network together with - other HSR capable nodes. + DANH ("Doubly attached node implementing HSR") or DANP ("Doubly + attached node implementing PRP"). For this to work, your Linux box + needs (at least) two physical Ethernet interfaces. + + For DANH, it must be connected as a node in a ring network together + with other HSR capable nodes. All Ethernet frames sent over the HSR + device will be sent in both directions on the ring (over both slave + ports), giving a redundant, instant fail-over network. Each HSR node + in the ring acts like a bridge for HSR frames, but filters frames + that have been forwarded earlier. - All Ethernet frames sent over the hsr device will be sent in both - directions on the ring (over both slave ports), giving a redundant, - instant fail-over network. Each HSR node in the ring acts like a - bridge for HSR frames, but filters frames that have been forwarded - earlier. + For DANP, it must be connected as a node connecting to two + separate networks over the two slave interfaces. Like HSR, Ethernet + frames sent over the PRP device will be sent to both networks giving + a redundant, instant fail-over network. Unlike HSR, PRP networks + can have Singly Attached Nodes (SAN) such as PC, printer, bridges + etc and will be able to communicate with DANP nodes. This code is a "best effort" to comply with the HSR standard as described in IEC 62439-3:2010 (HSRv0) and IEC 62439-3:2012 (HSRv1), - but no compliancy tests have been made. Use iproute2 to select - the version you desire. + and PRP standard described in IEC 62439-4:2012 (PRP), but no + compliancy tests have been made. Use iproute2 to select the protocol + you would like to use. You need to perform any and all necessary tests yourself before relying on this code in a safety critical system! diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 9787ef11ca71..c1932c0a15be 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -1,5 +1,5 @@ /* - * hsr_debugfs code + * debugfs code for HSR & PRP * Copyright (C) 2019 Texas Instruments Incorporated * * Author(s): diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 8a927b647829..40ac45123a62 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -3,9 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se - * * This file contains device methods for creating, using and destroying - * virtual HSR devices. + * virtual HSR or PRP devices. */ #include @@ -427,6 +426,10 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); + /* currently PRP is not supported */ + if (protocol_version == PRP_V1) + return -EPROTONOSUPPORT; + /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index b8f9262ed101..868373822ee4 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * include file for HSR and PRP. */ #ifndef __HSR_DEVICE_H diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ab8dca0c0b65..55adb4dbd235 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * Frame router for HSR and PRP. */ #include "hsr_forward.h" diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index 51a69295566c..b2a6fa319d94 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * include file for HSR and PRP. */ #ifndef __HSR_FORWARD_H diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 530de24b1fb5..13b2190e6556 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -8,6 +8,7 @@ * interface. A frame is identified by its source MAC address and its HSR * sequence number. This code keeps track of senders and their sequence numbers * to allow filtering of duplicate frames, and to detect HSR ring errors. + * Same code handles filtering of duplicates for PRP as well. */ #include diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 0f0fa12b4329..c06447780d05 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * include file for HSR and PRP. */ #ifndef __HSR_FRAMEREG_H diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 144da15f0a81..2fd1976e5b1c 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * Event handling for HSR and PRP devices. */ #include diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index f74193465bf5..8cf10d67d5f9 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * include file for HSR and PRP. */ #ifndef __HSR_PRIVATE_H @@ -131,6 +133,13 @@ struct hsr_port { enum hsr_port_type type; }; +/* used by driver internally to differentiate various protocols */ +enum hsr_version { + HSR_V0 = 0, + HSR_V1, + PRP_V1, +}; + struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; @@ -141,7 +150,7 @@ struct hsr_priv { int announce_count; u16 sequence_nr; u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ - u8 prot_version; /* Indicate if HSRv0 or HSRv1. */ + enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ unsigned char sup_multicast_addr[ETH_ALEN]; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 6e14b7d22639..06c3cd988760 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -4,7 +4,7 @@ * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * - * Routines for handling Netlink messages for HSR. + * Routines for handling Netlink messages for HSR and PRP. */ #include "hsr_netlink.h" @@ -22,6 +22,7 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_VERSION] = { .type = NLA_U8 }, [IFLA_HSR_SUPERVISION_ADDR] = { .len = ETH_ALEN }, [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, + [IFLA_HSR_PROTOCOL] = { .type = NLA_U8 }, }; /* Here, it seems a netdevice has already been allocated for us, and the @@ -31,8 +32,10 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + enum hsr_version proto_version; + unsigned char multicast_spec; + u8 proto = HSR_PROTOCOL_HSR; struct net_device *link[2]; - unsigned char multicast_spec, hsr_version; if (!data) { NL_SET_ERR_MSG_MOD(extack, "No slave devices specified"); @@ -69,18 +72,34 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, else multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]); + if (data[IFLA_HSR_PROTOCOL]) + proto = nla_get_u8(data[IFLA_HSR_PROTOCOL]); + + if (proto >= HSR_PROTOCOL_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol\n"); + return -EINVAL; + } + if (!data[IFLA_HSR_VERSION]) { - hsr_version = 0; + proto_version = HSR_V0; } else { - hsr_version = nla_get_u8(data[IFLA_HSR_VERSION]); - if (hsr_version > 1) { + if (proto == HSR_PROTOCOL_PRP) { + NL_SET_ERR_MSG_MOD(extack, "PRP version unsupported\n"); + return -EINVAL; + } + + proto_version = nla_get_u8(data[IFLA_HSR_VERSION]); + if (proto_version > HSR_V1) { NL_SET_ERR_MSG_MOD(extack, - "Only versions 0..1 are supported"); + "Only HSR version 0/1 supported\n"); return -EINVAL; } } - return hsr_dev_finalize(dev, link, multicast_spec, hsr_version, extack); + if (proto == HSR_PROTOCOL_PRP) + proto_version = PRP_V1; + + return hsr_dev_finalize(dev, link, multicast_spec, proto_version, extack); } static void hsr_dellink(struct net_device *dev, struct list_head *head) @@ -102,6 +121,7 @@ static void hsr_dellink(struct net_device *dev, struct list_head *head) static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct hsr_priv *hsr = netdev_priv(dev); + u8 proto = HSR_PROTOCOL_HSR; struct hsr_port *port; port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); @@ -120,6 +140,10 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) hsr->sup_multicast_addr) || nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr)) goto nla_put_failure; + if (hsr->prot_version == PRP_V1) + proto = HSR_PROTOCOL_PRP; + if (nla_put_u8(skb, IFLA_HSR_PROTOCOL, proto)) + goto nla_put_failure; return 0; diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h index 1121bb192a18..501552d9753b 100644 --- a/net/hsr/hsr_netlink.h +++ b/net/hsr/hsr_netlink.h @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * include file for HSR and PRP. */ #ifndef __HSR_NETLINK_H diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 25b6ffba26cd..b5c0834de338 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -3,6 +3,8 @@ * * Author(s): * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * Frame handler other utility functions for HSR and PRP. */ #include "hsr_slave.h" diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h index 8953ea279ce9..9708a4f0ec09 100644 --- a/net/hsr/hsr_slave.h +++ b/net/hsr/hsr_slave.h @@ -2,6 +2,8 @@ /* Copyright 2011-2014 Autronica Fire and Security AS * * 2011-2014 Arvid Brodin, arvid.brodin@alten.se + * + * include file for HSR and PRP. */ #ifndef __HSR_SLAVE_H -- cgit From 121c33b07b3127f501b366bc23d2a590e2f2b8ef Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:17 -0400 Subject: net: hsr: introduce common code for skb initialization As a preparatory patch to introduce PRP protocol support in the driver, refactor the skb init code to a separate function. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 40ac45123a62..bfe2d1449dbd 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -230,15 +230,11 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; -static void send_hsr_supervision_frame(struct hsr_port *master, - u8 type, u8 hsr_ver) +static struct sk_buff *hsr_init_skb(struct hsr_port *master, u8 hsr_ver) { + struct hsr_priv *hsr = master->hsr; struct sk_buff *skb; int hlen, tlen; - struct hsr_tag *hsr_tag; - struct hsr_sup_tag *hsr_stag; - struct hsr_sup_payload *hsr_sp; - unsigned long irqflags; hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; @@ -247,22 +243,44 @@ static void send_hsr_supervision_frame(struct hsr_port *master, sizeof(struct hsr_sup_payload) + hlen + tlen); if (!skb) - return; + return skb; skb_reserve(skb, hlen); - skb->dev = master->dev; skb->protocol = htons(hsr_ver ? ETH_P_HSR : ETH_P_PRP); skb->priority = TC_PRIO_CONTROL; if (dev_hard_header(skb, skb->dev, (hsr_ver ? ETH_P_HSR : ETH_P_PRP), - master->hsr->sup_multicast_addr, + hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; + skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); + return skb; +out: + kfree_skb(skb); + + return NULL; +} + +static void send_hsr_supervision_frame(struct hsr_port *master, + u8 type, u8 hsr_ver) +{ + struct hsr_sup_payload *hsr_sp; + struct hsr_sup_tag *hsr_stag; + struct hsr_tag *hsr_tag; + unsigned long irqflags; + struct sk_buff *skb; + + skb = hsr_init_skb(master, hsr_ver); + if (!skb) { + WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + return; + } + if (hsr_ver > 0) { hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); hsr_tag->encap_proto = htons(ETH_P_PRP); @@ -299,11 +317,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, return; hsr_forward_skb(skb, master); - return; -out: - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); - kfree_skb(skb); + return; } /* Announce (supervision frame) timer function -- cgit From 28e458e097f32af4f89f02e078a6d6b4ea7a52ed Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:18 -0400 Subject: net: hsr: introduce protocol specific function pointers As a preparatory patch to introduce support for PRP protocol, add a protocol ops ptr in the private hsr structure to hold function pointers as some of the functions at protocol level packet handling is different for HSR vs PRP. It is expected that PRP will add its of set of functions for protocol handling. Modify existing hsr_announce() function to call proto_ops->send_sv_frame() to send supervision frame for HSR. This is expected to be different for PRP. So introduce a ops function ptr, send_sv_frame() for the same and initialize it to send_hsr_supervsion_frame(). Modify hsr_announce() to call proto_ops->send_sv_frame(). Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 70 +++++++++++++++++++++++++++++----------------------- net/hsr/hsr_main.h | 6 +++++ 2 files changed, 45 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index bfe2d1449dbd..006e715eccb6 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -230,7 +230,7 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; -static struct sk_buff *hsr_init_skb(struct hsr_port *master, u8 hsr_ver) +static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto) { struct hsr_priv *hsr = master->hsr; struct sk_buff *skb; @@ -247,10 +247,10 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master, u8 hsr_ver) skb_reserve(skb, hlen); skb->dev = master->dev; - skb->protocol = htons(hsr_ver ? ETH_P_HSR : ETH_P_PRP); + skb->protocol = htons(proto); skb->priority = TC_PRIO_CONTROL; - if (dev_hard_header(skb, skb->dev, (hsr_ver ? ETH_P_HSR : ETH_P_PRP), + if (dev_hard_header(skb, skb->dev, proto, hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; @@ -267,47 +267,62 @@ out: } static void send_hsr_supervision_frame(struct hsr_port *master, - u8 type, u8 hsr_ver) + unsigned long *interval) { + struct hsr_priv *hsr = master->hsr; + __u8 type = HSR_TLV_LIFE_CHECK; + struct hsr_tag *hsr_tag = NULL; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; - struct hsr_tag *hsr_tag; unsigned long irqflags; struct sk_buff *skb; + u16 proto; + + *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + if (hsr->announce_count < 3 && hsr->prot_version == 0) { + type = HSR_TLV_ANNOUNCE; + *interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + hsr->announce_count++; + } + + if (!hsr->prot_version) + proto = ETH_P_PRP; + else + proto = ETH_P_HSR; - skb = hsr_init_skb(master, hsr_ver); + skb = hsr_init_skb(master, proto); if (!skb) { WARN_ONCE(1, "HSR: Could not send supervision frame\n"); return; } - if (hsr_ver > 0) { + if (hsr->prot_version > 0) { hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); hsr_tag->encap_proto = htons(ETH_P_PRP); set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); - set_hsr_stag_path(hsr_stag, (hsr_ver ? 0x0 : 0xf)); - set_hsr_stag_HSR_ver(hsr_stag, hsr_ver); + set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); + set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); /* From HSRv1 on we have separate supervision sequence numbers. */ spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); - if (hsr_ver > 0) { - hsr_stag->sequence_nr = htons(master->hsr->sup_sequence_nr); - hsr_tag->sequence_nr = htons(master->hsr->sequence_nr); - master->hsr->sup_sequence_nr++; - master->hsr->sequence_nr++; + if (hsr->prot_version > 0) { + hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); + hsr->sup_sequence_nr++; + hsr_tag->sequence_nr = htons(hsr->sequence_nr); + hsr->sequence_nr++; } else { - hsr_stag->sequence_nr = htons(master->hsr->sequence_nr); - master->hsr->sequence_nr++; + hsr_stag->sequence_nr = htons(hsr->sequence_nr); + hsr->sequence_nr++; } spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_stag->HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ - hsr_stag->HSR_TLV_length = - hsr_ver ? sizeof(struct hsr_sup_payload) : 12; + hsr_stag->HSR_TLV_length = hsr->prot_version ? + sizeof(struct hsr_sup_payload) : 12; /* Payload: MacAddressA */ hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); @@ -333,19 +348,7 @@ static void hsr_announce(struct timer_list *t) rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - - if (hsr->announce_count < 3 && hsr->prot_version == 0) { - send_hsr_supervision_frame(master, HSR_TLV_ANNOUNCE, - hsr->prot_version); - hsr->announce_count++; - - interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - } else { - send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, - hsr->prot_version); - - interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); - } + hsr->proto_ops->send_sv_frame(master, &interval); if (is_admin_up(master->dev)) mod_timer(&hsr->announce_timer, jiffies + interval); @@ -382,6 +385,10 @@ static struct device_type hsr_type = { .name = "hsr", }; +static struct hsr_proto_ops hsr_ops = { + .send_sv_frame = send_hsr_supervision_frame, +}; + void hsr_dev_setup(struct net_device *dev) { eth_hw_addr_random(dev); @@ -445,6 +452,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (protocol_version == PRP_V1) return -EPROTONOSUPPORT; + hsr->proto_ops = &hsr_ops; /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 8cf10d67d5f9..671270115a50 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -140,6 +140,11 @@ enum hsr_version { PRP_V1, }; +struct hsr_proto_ops { + /* format and send supervision frame */ + void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval); +}; + struct hsr_priv { struct rcu_head rcu_head; struct list_head ports; @@ -153,6 +158,7 @@ struct hsr_priv { enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ + struct hsr_proto_ops *proto_ops; unsigned char sup_multicast_addr[ETH_ALEN]; #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; -- cgit From c643ff0383c858b9af09f582ed2947810e4cf407 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:19 -0400 Subject: net: prp: add supervision frame generation utility function Add support for generation of PRP supervision frames. For PRP, supervision frame format is similar to HSR version 0, but have a PRP Redundancy Control Trailer (RCT) added and uses a different message type, PRP_TLV_LIFE_CHECK_DD. Also update is_supervision_frame() to include the new message type used for PRP supervision frame. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++- net/hsr/hsr_forward.c | 4 +++- net/hsr/hsr_main.h | 22 ++++++++++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 006e715eccb6..74eaf28743a4 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -238,6 +238,10 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto) hlen = LL_RESERVED_SPACE(master->dev); tlen = master->dev->needed_tailroom; + /* skb size is same for PRP/HSR frames, only difference + * being, for PRP it is a trailer and for HSR it is a + * header + */ skb = dev_alloc_skb(sizeof(struct hsr_tag) + sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload) + hlen + tlen); @@ -336,6 +340,55 @@ static void send_hsr_supervision_frame(struct hsr_port *master, return; } +static void send_prp_supervision_frame(struct hsr_port *master, + unsigned long *interval) +{ + struct hsr_priv *hsr = master->hsr; + struct hsr_sup_payload *hsr_sp; + struct hsr_sup_tag *hsr_stag; + unsigned long irqflags; + struct sk_buff *skb; + struct prp_rct *rct; + u8 *tail; + + skb = hsr_init_skb(master, ETH_P_PRP); + if (!skb) { + WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + return; + } + + *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); + set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); + set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); + + /* From HSRv1 on we have separate supervision sequence numbers. */ + spin_lock_irqsave(&master->hsr->seqnr_lock, irqflags); + hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); + hsr->sup_sequence_nr++; + hsr_stag->HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; + hsr_stag->HSR_TLV_length = sizeof(struct hsr_sup_payload); + + /* Payload: MacAddressA */ + hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); + ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); + + if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) { + spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); + return; + } + + tail = skb_tail_pointer(skb) - HSR_HLEN; + rct = (struct prp_rct *)tail; + rct->PRP_suffix = htons(ETH_P_PRP); + set_prp_LSDU_size(rct, HSR_V1_SUP_LSDUSIZE); + rct->sequence_nr = htons(hsr->sequence_nr); + hsr->sequence_nr++; + spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); + + hsr_forward_skb(skb, master); +} + /* Announce (supervision frame) timer function */ static void hsr_announce(struct timer_list *t) @@ -389,6 +442,10 @@ static struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, }; +struct hsr_proto_ops prp_ops = { + .send_sv_frame = send_prp_supervision_frame, +}; + void hsr_dev_setup(struct net_device *dev) { eth_hw_addr_random(dev); @@ -452,7 +509,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (protocol_version == PRP_V1) return -EPROTONOSUPPORT; - hsr->proto_ops = &hsr_ops; + /* initialize protocol specific functions */ + if (protocol_version == PRP_V1) + hsr->proto_ops = &prp_ops; + else + hsr->proto_ops = &hsr_ops; + /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 55adb4dbd235..dedf8ac6f992 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -76,7 +76,9 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) } if (hsr_sup_tag->HSR_TLV_type != HSR_TLV_ANNOUNCE && - hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK) + hsr_sup_tag->HSR_TLV_type != HSR_TLV_LIFE_CHECK && + hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DD && + hsr_sup_tag->HSR_TLV_type != PRP_TLV_LIFE_CHECK_DA) return false; if (hsr_sup_tag->HSR_TLV_length != 12 && hsr_sup_tag->HSR_TLV_length != sizeof(struct hsr_sup_payload)) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 671270115a50..58e1ad21b66f 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -35,6 +35,10 @@ #define HSR_TLV_ANNOUNCE 22 #define HSR_TLV_LIFE_CHECK 23 +/* PRP V1 life check for Duplicate discard */ +#define PRP_TLV_LIFE_CHECK_DD 20 +/* PRP V1 life check for Duplicate Accept */ +#define PRP_TLV_LIFE_CHECK_DA 21 /* HSR Tag. * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, @@ -126,6 +130,24 @@ enum hsr_port_type { HSR_PT_PORTS, /* This must be the last item in the enum */ }; +/* PRP Redunancy Control Trailor (RCT). + * As defined in IEC-62439-4:2012, the PRP RCT is really { sequence Nr, + * Lan indentifier (LanId), LSDU_size and PRP_suffix = 0x88FB }. + * + * Field names as defined in the IEC:2012 standard for PRP. + */ +struct prp_rct { + __be16 sequence_nr; + __be16 lan_id_and_LSDU_size; + __be16 PRP_suffix; +} __packed; + +static inline void set_prp_LSDU_size(struct prp_rct *rct, u16 LSDU_size) +{ + rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) & + 0xF000) | (LSDU_size & 0x0FFF)); +} + struct hsr_port { struct list_head port_list; struct net_device *dev; -- cgit From fa4dc89531360de760359bf94086b04dada98d4e Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:20 -0400 Subject: net: hsr: define and use proto_ops ptrs to handle hsr specific frames As a preparatory patch to introduce PRP, refactor the code specific to handling HSR frames into separate functions and call them through proto_ops function pointers. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 5 +++- net/hsr/hsr_forward.c | 63 ++++++++++++++++++++++++++++++--------------------- net/hsr/hsr_forward.h | 7 +++++- net/hsr/hsr_main.h | 8 +++++++ 4 files changed, 55 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 74eaf28743a4..022393bed40a 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -440,9 +440,12 @@ static struct device_type hsr_type = { static struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, + .create_tagged_frame = hsr_create_tagged_frame, + .get_untagged_frame = hsr_get_untagged_frame, + .fill_frame_info = hsr_fill_frame_info, }; -struct hsr_proto_ops prp_ops = { +static struct hsr_proto_ops prp_ops = { .send_sv_frame = send_prp_supervision_frame, }; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index dedf8ac6f992..33e8136891bc 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -116,8 +116,8 @@ static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, return skb; } -static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, - struct hsr_port *port) +struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port) { if (!frame->skb_std) frame->skb_std = create_stripped_skb(frame->skb_hsr, frame); @@ -192,8 +192,8 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, /* If the original frame was an HSR tagged frame, just clone it to be sent * unchanged. Otherwise, create a private frame especially tagged for 'port'. */ -static struct sk_buff *frame_get_tagged_skb(struct hsr_frame_info *frame, - struct hsr_port *port) +struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port) { if (frame->skb_hsr) return skb_clone(frame->skb_hsr, GFP_ATOMIC); @@ -257,6 +257,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame) struct sk_buff *skb; hsr_for_each_port(frame->port_rcv->hsr, port) { + struct hsr_priv *hsr = port->hsr; /* Don't send frame back the way it came */ if (port == frame->port_rcv) continue; @@ -282,9 +283,10 @@ static void hsr_forward_do(struct hsr_frame_info *frame) } if (port->type != HSR_PT_MASTER) - skb = frame_get_tagged_skb(frame, port); + skb = hsr->proto_ops->create_tagged_frame(frame, port); else - skb = frame_get_stripped_skb(frame, port); + skb = hsr->proto_ops->get_untagged_frame(frame, port); + if (!skb) { /* FIXME: Record the dropped frame? */ continue; @@ -317,12 +319,34 @@ static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, } } -static int hsr_fill_frame_info(struct hsr_frame_info *frame, - struct sk_buff *skb, struct hsr_port *port) +void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) { - struct ethhdr *ethhdr; + struct hsr_priv *hsr = frame->port_rcv->hsr; unsigned long irqflags; + if (proto == htons(ETH_P_PRP) || proto == htons(ETH_P_HSR)) { + frame->skb_std = NULL; + frame->skb_hsr = skb; + frame->sequence_nr = hsr_get_skb_sequence_nr(skb); + } else { + frame->skb_std = skb; + frame->skb_hsr = NULL; + /* Sequence nr for the master node */ + spin_lock_irqsave(&hsr->seqnr_lock, irqflags); + frame->sequence_nr = hsr->sequence_nr; + hsr->sequence_nr++; + spin_unlock_irqrestore(&hsr->seqnr_lock, irqflags); + } +} + +static int fill_frame_info(struct hsr_frame_info *frame, + struct sk_buff *skb, struct hsr_port *port) +{ + struct hsr_priv *hsr = port->hsr; + struct ethhdr *ethhdr; + __be16 proto; + frame->is_supervision = is_supervision_frame(port->hsr, skb); frame->node_src = hsr_get_node(port, skb, frame->is_supervision); if (!frame->node_src) @@ -335,23 +359,10 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame, /* FIXME: */ netdev_warn_once(skb->dev, "VLAN not yet supported"); } - if (ethhdr->h_proto == htons(ETH_P_PRP) || - ethhdr->h_proto == htons(ETH_P_HSR)) { - frame->skb_std = NULL; - frame->skb_hsr = skb; - frame->sequence_nr = hsr_get_skb_sequence_nr(skb); - } else { - frame->skb_std = skb; - frame->skb_hsr = NULL; - /* Sequence nr for the master node */ - spin_lock_irqsave(&port->hsr->seqnr_lock, irqflags); - frame->sequence_nr = port->hsr->sequence_nr; - port->hsr->sequence_nr++; - spin_unlock_irqrestore(&port->hsr->seqnr_lock, irqflags); - } - + proto = ethhdr->h_proto; frame->port_rcv = port; - check_local_dest(port->hsr, skb, frame); + hsr->proto_ops->fill_frame_info(proto, skb, frame); + check_local_dest(hsr, skb, frame); return 0; } @@ -367,7 +378,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) goto out_drop; } - if (hsr_fill_frame_info(&frame, skb, port) < 0) + if (fill_frame_info(&frame, skb, port) < 0) goto out_drop; hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); hsr_forward_do(&frame); diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index b2a6fa319d94..893207792d56 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -14,5 +14,10 @@ #include "hsr_main.h" void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port); - +struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port); +struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port); +void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); #endif /* __HSR_FORWARD_H */ diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 58e1ad21b66f..14f442c57a84 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -162,9 +162,17 @@ enum hsr_version { PRP_V1, }; +struct hsr_frame_info; + struct hsr_proto_ops { /* format and send supervision frame */ void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval); + struct sk_buff * (*get_untagged_frame)(struct hsr_frame_info *frame, + struct hsr_port *port); + struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame, + struct hsr_port *port); + void (*fill_frame_info)(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); }; struct hsr_priv { -- cgit From 451d8123f89791bb628277c0bdb4cae34a3563e6 Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:21 -0400 Subject: net: prp: add packet handling support DAN-P (Dual Attached Nodes PRP) nodes are expected to receive traditional IP packets as well as PRP (Parallel Redundancy Protocol) tagged (trailer) packets. PRP trailer is 6 bytes of PRP protocol unit called RCT, Redundancy Control Trailer (RCT) similar to HSR tag. PRP network can have traditional devices such as bridges/switches or PC attached to it and should be able to communicate. Regular Ethernet devices treat the RCT as pads. This patch adds logic to format L2 frames from network stack to add a trailer (RCT) and send it as duplicates over the slave interfaces when the protocol is PRP as per IEC 62439-3. At the ingress, it strips the trailer, do duplicate detection and rejection and forward a stripped frame up the network stack. PRP device should accept frames from Singly Attached Nodes (SAN) and thus the driver mark the link where the frame came from in the node table. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 20 ++-- net/hsr/hsr_forward.c | 272 ++++++++++++++++++++++++++++++++++++++----------- net/hsr/hsr_forward.h | 7 ++ net/hsr/hsr_framereg.c | 94 +++++++++++++---- net/hsr/hsr_framereg.h | 29 +++++- net/hsr/hsr_main.h | 73 ++++++++++++- net/hsr/hsr_slave.c | 24 ++++- net/hsr/hsr_slave.h | 2 + 8 files changed, 433 insertions(+), 88 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 022393bed40a..ab953a1a0d6c 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -443,10 +443,17 @@ static struct hsr_proto_ops hsr_ops = { .create_tagged_frame = hsr_create_tagged_frame, .get_untagged_frame = hsr_get_untagged_frame, .fill_frame_info = hsr_fill_frame_info, + .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame, }; static struct hsr_proto_ops prp_ops = { .send_sv_frame = send_prp_supervision_frame, + .create_tagged_frame = prp_create_tagged_frame, + .get_untagged_frame = prp_get_untagged_frame, + .drop_frame = prp_drop_frame, + .fill_frame_info = prp_fill_frame_info, + .handle_san_frame = prp_handle_san_frame, + .update_san_info = prp_update_san_info, }; void hsr_dev_setup(struct net_device *dev) @@ -508,15 +515,16 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr); - /* currently PRP is not supported */ - if (protocol_version == PRP_V1) - return -EPROTONOSUPPORT; - /* initialize protocol specific functions */ - if (protocol_version == PRP_V1) + if (protocol_version == PRP_V1) { + /* For PRP, lan_id has most significant 3 bits holding + * the net_id of PRP_LAN_ID + */ + hsr->net_id = PRP_LAN_ID << 1; hsr->proto_ops = &prp_ops; - else + } else { hsr->proto_ops = &hsr_ops; + } /* Make sure we recognize frames from ourselves in hsr_rcv() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 33e8136891bc..cadfccd7876e 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -17,18 +17,6 @@ struct hsr_node; -struct hsr_frame_info { - struct sk_buff *skb_std; - struct sk_buff *skb_hsr; - struct hsr_port *port_rcv; - struct hsr_node *node_src; - u16 sequence_nr; - bool is_supervision; - bool is_vlan; - bool is_local_dest; - bool is_local_exclusive; -}; - /* The uses I can see for these HSR supervision frames are: * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type = * 22") to reset any sequence_nr counters belonging to that node. Useful if @@ -87,8 +75,8 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) return true; } -static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, - struct hsr_frame_info *frame) +static struct sk_buff *create_stripped_skb_hsr(struct sk_buff *skb_in, + struct hsr_frame_info *frame) { struct sk_buff *skb; int copylen; @@ -119,35 +107,120 @@ static struct sk_buff *create_stripped_skb(struct sk_buff *skb_in, struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { - if (!frame->skb_std) - frame->skb_std = create_stripped_skb(frame->skb_hsr, frame); + if (!frame->skb_std) { + if (frame->skb_hsr) { + frame->skb_std = + create_stripped_skb_hsr(frame->skb_hsr, frame); + } else { + /* Unexpected */ + WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", + __FILE__, __LINE__, port->dev->name); + return NULL; + } + } + return skb_clone(frame->skb_std, GFP_ATOMIC); } +struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port) +{ + if (!frame->skb_std) { + if (frame->skb_prp) { + /* trim the skb by len - HSR_HLEN to exclude RCT */ + skb_trim(frame->skb_prp, + frame->skb_prp->len - HSR_HLEN); + frame->skb_std = + __pskb_copy(frame->skb_prp, + skb_headroom(frame->skb_prp), + GFP_ATOMIC); + } else { + /* Unexpected */ + WARN_ONCE(1, "%s:%d: Unexpected frame received (port_src %s)\n", + __FILE__, __LINE__, port->dev->name); + return NULL; + } + } + + return skb_clone(frame->skb_std, GFP_ATOMIC); +} + +static void prp_set_lan_id(struct prp_rct *trailer, + struct hsr_port *port) +{ + int lane_id; + + if (port->type == HSR_PT_SLAVE_A) + lane_id = 0; + else + lane_id = 1; + + /* Add net_id in the upper 3 bits of lane_id */ + lane_id |= port->hsr->net_id; + set_prp_lan_id(trailer, lane_id); +} + +/* Tailroom for PRP rct should have been created before calling this */ +static struct sk_buff *prp_fill_rct(struct sk_buff *skb, + struct hsr_frame_info *frame, + struct hsr_port *port) +{ + struct prp_rct *trailer; + int min_size = ETH_ZLEN; + int lsdu_size; + + if (!skb) + return skb; + + if (frame->is_vlan) + min_size = VLAN_ETH_ZLEN; + + if (skb_put_padto(skb, min_size)) + return NULL; + + trailer = (struct prp_rct *)skb_put(skb, HSR_HLEN); + lsdu_size = skb->len - 14; + if (frame->is_vlan) + lsdu_size -= 4; + prp_set_lan_id(trailer, port); + set_prp_LSDU_size(trailer, lsdu_size); + trailer->sequence_nr = htons(frame->sequence_nr); + trailer->PRP_suffix = htons(ETH_P_PRP); + + return skb; +} + +static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, + struct hsr_port *port) +{ + int path_id; + + if (port->type == HSR_PT_SLAVE_A) + path_id = 0; + else + path_id = 1; + + set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); +} + static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; - int lane_id; int lsdu_size; /* pad to minimum packet size which is 60 + 6 (HSR tag) */ if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) return NULL; - if (port->type == HSR_PT_SLAVE_A) - lane_id = 0; - else - lane_id = 1; - lsdu_size = skb->len - 14; if (frame->is_vlan) lsdu_size -= 4; hsr_ethhdr = (struct hsr_ethhdr *)skb_mac_header(skb); - set_hsr_tag_path(&hsr_ethhdr->hsr_tag, lane_id); + hsr_set_path_id(hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; @@ -157,16 +230,28 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, return skb; } -static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, - struct hsr_frame_info *frame, - struct hsr_port *port) +/* If the original frame was an HSR tagged frame, just clone it to be sent + * unchanged. Otherwise, create a private frame especially tagged for 'port'. + */ +struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port) { - int movelen; unsigned char *dst, *src; struct sk_buff *skb; + int movelen; + + if (frame->skb_hsr) { + struct hsr_ethhdr *hsr_ethhdr = + (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); + + /* set the lane id properly */ + hsr_set_path_id(hsr_ethhdr, port); + return skb_clone(frame->skb_hsr, GFP_ATOMIC); + } /* Create the new skb with enough headroom to fit the HSR tag */ - skb = __pskb_copy(skb_o, skb_headroom(skb_o) + HSR_HLEN, GFP_ATOMIC); + skb = __pskb_copy(frame->skb_std, + skb_headroom(frame->skb_std) + HSR_HLEN, GFP_ATOMIC); if (!skb) return NULL; skb_reset_mac_header(skb); @@ -189,21 +274,29 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, return hsr_fill_tag(skb, frame, port, port->hsr->prot_version); } -/* If the original frame was an HSR tagged frame, just clone it to be sent - * unchanged. Otherwise, create a private frame especially tagged for 'port'. - */ -struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, +struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, struct hsr_port *port) { - if (frame->skb_hsr) - return skb_clone(frame->skb_hsr, GFP_ATOMIC); + struct sk_buff *skb; - if (port->type != HSR_PT_SLAVE_A && port->type != HSR_PT_SLAVE_B) { - WARN_ONCE(1, "HSR: Bug: trying to create a tagged frame for a non-ring port"); - return NULL; + if (frame->skb_prp) { + struct prp_rct *trailer = skb_get_PRP_rct(frame->skb_prp); + + if (trailer) { + prp_set_lan_id(trailer, port); + } else { + WARN_ONCE(!trailer, "errored PRP skb"); + return NULL; + } + return skb_clone(frame->skb_prp, GFP_ATOMIC); } - return create_tagged_skb(frame->skb_std, frame, port); + skb = skb_copy_expand(frame->skb_std, 0, + skb_tailroom(frame->skb_std) + HSR_HLEN, + GFP_ATOMIC); + prp_fill_rct(skb, frame, port); + + return skb; } static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, @@ -240,9 +333,18 @@ static int hsr_xmit(struct sk_buff *skb, struct hsr_port *port, return dev_queue_xmit(skb); } +bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) +{ + return ((frame->port_rcv->type == HSR_PT_SLAVE_A && + port->type == HSR_PT_SLAVE_B) || + (frame->port_rcv->type == HSR_PT_SLAVE_B && + port->type == HSR_PT_SLAVE_A)); +} + /* Forward the frame through all devices except: * - Back through the receiving device * - If it's a HSR frame: through a device where it has passed before + * - if it's a PRP frame: through another PRP slave device (no bridge) * - To the local HSR master only if the frame is directly addressed to it, or * a non-supervision multicast or broadcast frame. * @@ -270,25 +372,33 @@ static void hsr_forward_do(struct hsr_frame_info *frame) if (port->type != HSR_PT_MASTER && frame->is_local_exclusive) continue; - /* Don't send frame over port where it has been sent before */ - if (hsr_register_frame_out(port, frame->node_src, + /* Don't send frame over port where it has been sent before. + * Also fro SAN, this shouldn't be done. + */ + if (!frame->is_from_san && + hsr_register_frame_out(port, frame->node_src, frame->sequence_nr)) continue; if (frame->is_supervision && port->type == HSR_PT_MASTER) { - hsr_handle_sup_frame(frame->skb_hsr, - frame->node_src, - frame->port_rcv); + hsr_handle_sup_frame(frame); continue; } + /* Check if frame is to be dropped. Eg. for PRP no forward + * between ports. + */ + if (hsr->proto_ops->drop_frame && + hsr->proto_ops->drop_frame(frame, port)) + continue; + if (port->type != HSR_PT_MASTER) skb = hsr->proto_ops->create_tagged_frame(frame, port); else skb = hsr->proto_ops->get_untagged_frame(frame, port); if (!skb) { - /* FIXME: Record the dropped frame? */ + frame->port_rcv->dev->stats.rx_dropped++; continue; } @@ -319,19 +429,20 @@ static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, } } -void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame) +static void handle_std_frame(struct sk_buff *skb, + struct hsr_frame_info *frame) { - struct hsr_priv *hsr = frame->port_rcv->hsr; + struct hsr_port *port = frame->port_rcv; + struct hsr_priv *hsr = port->hsr; unsigned long irqflags; - if (proto == htons(ETH_P_PRP) || proto == htons(ETH_P_HSR)) { - frame->skb_std = NULL; - frame->skb_hsr = skb; - frame->sequence_nr = hsr_get_skb_sequence_nr(skb); + frame->skb_hsr = NULL; + frame->skb_prp = NULL; + frame->skb_std = skb; + + if (port->type != HSR_PT_MASTER) { + frame->is_from_san = true; } else { - frame->skb_std = skb; - frame->skb_hsr = NULL; /* Sequence nr for the master node */ spin_lock_irqsave(&hsr->seqnr_lock, irqflags); frame->sequence_nr = hsr->sequence_nr; @@ -340,29 +451,74 @@ void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, } } +void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) +{ + if (proto == htons(ETH_P_PRP) || + proto == htons(ETH_P_HSR)) { + /* HSR tagged frame :- Data or Supervision */ + frame->skb_std = NULL; + frame->skb_prp = NULL; + frame->skb_hsr = skb; + frame->sequence_nr = hsr_get_skb_sequence_nr(skb); + return; + } + + /* Standard frame or PRP from master port */ + handle_std_frame(skb, frame); +} + +void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) +{ + /* Supervision frame */ + struct prp_rct *rct = skb_get_PRP_rct(skb); + + if (rct && + prp_check_lsdu_size(skb, rct, frame->is_supervision)) { + frame->skb_hsr = NULL; + frame->skb_std = NULL; + frame->skb_prp = skb; + frame->sequence_nr = prp_get_skb_sequence_nr(rct); + return; + } + handle_std_frame(skb, frame); +} + static int fill_frame_info(struct hsr_frame_info *frame, struct sk_buff *skb, struct hsr_port *port) { struct hsr_priv *hsr = port->hsr; + struct hsr_vlan_ethhdr *vlan_hdr; struct ethhdr *ethhdr; __be16 proto; + memset(frame, 0, sizeof(*frame)); frame->is_supervision = is_supervision_frame(port->hsr, skb); - frame->node_src = hsr_get_node(port, skb, frame->is_supervision); + frame->node_src = hsr_get_node(port, &hsr->node_db, skb, + frame->is_supervision, + port->type); if (!frame->node_src) return -1; /* Unknown node and !is_supervision, or no mem */ ethhdr = (struct ethhdr *)skb_mac_header(skb); frame->is_vlan = false; - if (ethhdr->h_proto == htons(ETH_P_8021Q)) { + proto = ethhdr->h_proto; + + if (proto == htons(ETH_P_8021Q)) frame->is_vlan = true; + + if (frame->is_vlan) { + vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr; + proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; /* FIXME: */ netdev_warn_once(skb->dev, "VLAN not yet supported"); } - proto = ethhdr->h_proto; + + frame->is_from_san = false; frame->port_rcv = port; hsr->proto_ops->fill_frame_info(proto, skb, frame); - check_local_dest(hsr, skb, frame); + check_local_dest(port->hsr, skb, frame); return 0; } @@ -380,6 +536,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) if (fill_frame_info(&frame, skb, port) < 0) goto out_drop; + hsr_register_frame_in(frame.node_src, port, frame.sequence_nr); hsr_forward_do(&frame); /* Gets called for ingress frames as well as egress from master port. @@ -391,6 +548,7 @@ void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port) } kfree_skb(frame.skb_hsr); + kfree_skb(frame.skb_prp); kfree_skb(frame.skb_std); return; diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index 893207792d56..618140d484ad 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -14,10 +14,17 @@ #include "hsr_main.h" void hsr_forward_skb(struct sk_buff *skb, struct hsr_port *port); +struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port); struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, struct hsr_port *port); struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port); +struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, + struct hsr_port *port); +bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); +void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame); #endif /* __HSR_FORWARD_H */ diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 13b2190e6556..5c97de459905 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -127,6 +127,19 @@ void hsr_del_nodes(struct list_head *node_db) kfree(node); } +void prp_handle_san_frame(bool san, enum hsr_port_type port, + struct hsr_node *node) +{ + /* Mark if the SAN node is over LAN_A or LAN_B */ + if (port == HSR_PT_SLAVE_A) { + node->san_a = true; + return; + } + + if (port == HSR_PT_SLAVE_B) + node->san_b = true; +} + /* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A; * seq_out is used to initialize filtering of outgoing duplicate frames * originating from the newly added node. @@ -134,7 +147,8 @@ void hsr_del_nodes(struct list_head *node_db) static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, struct list_head *node_db, unsigned char addr[], - u16 seq_out) + u16 seq_out, bool san, + enum hsr_port_type rx_port) { struct hsr_node *new_node, *node; unsigned long now; @@ -155,6 +169,9 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, for (i = 0; i < HSR_PT_PORTS; i++) new_node->seq_out[i] = seq_out; + if (san && hsr->proto_ops->handle_san_frame) + hsr->proto_ops->handle_san_frame(san, rx_port, new_node); + spin_lock_bh(&hsr->list_lock); list_for_each_entry_rcu(node, node_db, mac_list, lockdep_is_held(&hsr->list_lock)) { @@ -172,15 +189,26 @@ out: return node; } +void prp_update_san_info(struct hsr_node *node, bool is_sup) +{ + if (!is_sup) + return; + + node->san_a = false; + node->san_b = false; +} + /* Get the hsr_node from which 'skb' was sent. */ -struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, - bool is_sup) +struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, + struct sk_buff *skb, bool is_sup, + enum hsr_port_type rx_port) { - struct list_head *node_db = &port->hsr->node_db; struct hsr_priv *hsr = port->hsr; struct hsr_node *node; struct ethhdr *ethhdr; + struct prp_rct *rct; + bool san = false; u16 seq_out; if (!skb_mac_header_was_set(skb)) @@ -189,14 +217,21 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, ethhdr = (struct ethhdr *)skb_mac_header(skb); list_for_each_entry_rcu(node, node_db, mac_list) { - if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) + if (ether_addr_equal(node->macaddress_A, ethhdr->h_source)) { + if (hsr->proto_ops->update_san_info) + hsr->proto_ops->update_san_info(node, is_sup); return node; - if (ether_addr_equal(node->macaddress_B, ethhdr->h_source)) + } + if (ether_addr_equal(node->macaddress_B, ethhdr->h_source)) { + if (hsr->proto_ops->update_san_info) + hsr->proto_ops->update_san_info(node, is_sup); return node; + } } - /* Everyone may create a node entry, connected node to a HSR device. */ - + /* Everyone may create a node entry, connected node to a HSR/PRP + * device. + */ if (ethhdr->h_proto == htons(ETH_P_PRP) || ethhdr->h_proto == htons(ETH_P_HSR)) { /* Use the existing sequence_nr from the tag as starting point @@ -204,31 +239,47 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, */ seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { - /* this is called also for frames from master port and - * so warn only for non master ports - */ - if (port->type != HSR_PT_MASTER) - WARN_ONCE(1, "%s: Non-HSR frame\n", __func__); - seq_out = HSR_SEQNR_START; + rct = skb_get_PRP_rct(skb); + if (rct && prp_check_lsdu_size(skb, rct, is_sup)) { + seq_out = prp_get_skb_sequence_nr(rct); + } else { + if (rx_port != HSR_PT_MASTER) + san = true; + seq_out = HSR_SEQNR_START; + } } - return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out); + return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out, + san, rx_port); } /* Use the Supervision frame's info about an eventual macaddress_B for merging * nodes that has previously had their macaddress_B registered as a separate * node. */ -void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, - struct hsr_port *port_rcv) +void hsr_handle_sup_frame(struct hsr_frame_info *frame) { + struct hsr_node *node_curr = frame->node_src; + struct hsr_port *port_rcv = frame->port_rcv; struct hsr_priv *hsr = port_rcv->hsr; struct hsr_sup_payload *hsr_sp; struct hsr_node *node_real; + struct sk_buff *skb = NULL; struct list_head *node_db; struct ethhdr *ethhdr; int i; + /* Here either frame->skb_hsr or frame->skb_prp should be + * valid as supervision frame always will have protocol + * header info. + */ + if (frame->skb_hsr) + skb = frame->skb_hsr; + else if (frame->skb_prp) + skb = frame->skb_prp; + if (!skb) + return; + ethhdr = (struct ethhdr *)skb_mac_header(skb); /* Leave the ethernet header. */ @@ -249,7 +300,8 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, if (!node_real) /* No frame received from AddrA of this node yet */ node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, - HSR_SEQNR_START - 1); + HSR_SEQNR_START - 1, true, + port_rcv->type); if (!node_real) goto done; /* No mem */ if (node_real == node_curr) @@ -275,7 +327,11 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, kfree_rcu(node_curr, rcu_head); done: - skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); + /* PRP uses v0 header */ + if (ethhdr->h_proto == htons(ETH_P_HSR)) + skb_push(skb, sizeof(struct hsrv1_ethhdr_sp)); + else + skb_push(skb, sizeof(struct hsrv0_ethhdr_sp)); } /* 'skb' is a frame meant for this host, that is to be passed to upper layers. diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index c06447780d05..86b43f539f2c 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -14,12 +14,26 @@ struct hsr_node; +struct hsr_frame_info { + struct sk_buff *skb_std; + struct sk_buff *skb_hsr; + struct sk_buff *skb_prp; + struct hsr_port *port_rcv; + struct hsr_node *node_src; + u16 sequence_nr; + bool is_supervision; + bool is_vlan; + bool is_local_dest; + bool is_local_exclusive; + bool is_from_san; +}; + void hsr_del_self_node(struct hsr_priv *hsr); void hsr_del_nodes(struct list_head *node_db); -struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, - bool is_sup); -void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr, - struct hsr_port *port); +struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, + struct sk_buff *skb, bool is_sup, + enum hsr_port_type rx_port); +void hsr_handle_sup_frame(struct hsr_frame_info *frame); bool hsr_addr_is_self(struct hsr_priv *hsr, unsigned char *addr); void hsr_addr_subst_source(struct hsr_node *node, struct sk_buff *skb); @@ -49,6 +63,10 @@ int hsr_get_node_data(struct hsr_priv *hsr, int *if2_age, u16 *if2_seq); +void prp_handle_san_frame(bool san, enum hsr_port_type port, + struct hsr_node *node); +void prp_update_san_info(struct hsr_node *node, bool is_sup); + struct hsr_node { struct list_head mac_list; unsigned char macaddress_A[ETH_ALEN]; @@ -57,6 +75,9 @@ struct hsr_node { enum hsr_port_type addr_B_port; unsigned long time_in[HSR_PT_PORTS]; bool time_in_stale[HSR_PT_PORTS]; + /* if the node is a SAN */ + bool san_a; + bool san_b; u16 seq_out[HSR_PT_PORTS]; struct rcu_head rcu_head; }; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 14f442c57a84..7dc92ce5a134 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -12,6 +12,7 @@ #include #include +#include /* Time constants as specified in the HSR specification (IEC-62439-3 2010) * Table 8. @@ -86,7 +87,12 @@ struct hsr_ethhdr { struct hsr_tag hsr_tag; } __packed; -/* HSR Supervision Frame data types. +struct hsr_vlan_ethhdr { + struct vlan_ethhdr vlanhdr; + struct hsr_tag hsr_tag; +} __packed; + +/* HSR/PRP Supervision Frame data types. * Field names as defined in the IEC:2010 standard for HSR. */ struct hsr_sup_tag { @@ -142,6 +148,16 @@ struct prp_rct { __be16 PRP_suffix; } __packed; +static inline u16 get_prp_LSDU_size(struct prp_rct *rct) +{ + return ntohs(rct->lan_id_and_LSDU_size) & 0x0FFF; +} + +static inline void set_prp_lan_id(struct prp_rct *rct, u16 lan_id) +{ + rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) & + 0x0FFF) | (lan_id << 12)); +} static inline void set_prp_LSDU_size(struct prp_rct *rct, u16 LSDU_size) { rct->lan_id_and_LSDU_size = htons((ntohs(rct->lan_id_and_LSDU_size) & @@ -163,16 +179,22 @@ enum hsr_version { }; struct hsr_frame_info; +struct hsr_node; struct hsr_proto_ops { /* format and send supervision frame */ void (*send_sv_frame)(struct hsr_port *port, unsigned long *interval); + void (*handle_san_frame)(bool san, enum hsr_port_type port, + struct hsr_node *node); + bool (*drop_frame)(struct hsr_frame_info *frame, struct hsr_port *port); struct sk_buff * (*get_untagged_frame)(struct hsr_frame_info *frame, struct hsr_port *port); struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame, struct hsr_port *port); void (*fill_frame_info)(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame); + bool (*invalid_dan_ingress_frame)(__be16 protocol); + void (*update_san_info)(struct hsr_node *node, bool is_sup); }; struct hsr_priv { @@ -189,6 +211,12 @@ struct hsr_priv { spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ struct hsr_proto_ops *proto_ops; +#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set + * based on SLAVE_A or SLAVE_B + */ + u8 net_id; /* for PRP, it occupies most significant 3 bits + * of lan_id + */ unsigned char sup_multicast_addr[ETH_ALEN]; #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; @@ -209,6 +237,49 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb) return ntohs(hsr_ethhdr->hsr_tag.sequence_nr); } +static inline struct prp_rct *skb_get_PRP_rct(struct sk_buff *skb) +{ + unsigned char *tail = skb_tail_pointer(skb) - HSR_HLEN; + + struct prp_rct *rct = (struct prp_rct *)tail; + + if (rct->PRP_suffix == htons(ETH_P_PRP)) + return rct; + + return NULL; +} + +/* Assume caller has confirmed this skb is PRP suffixed */ +static inline u16 prp_get_skb_sequence_nr(struct prp_rct *rct) +{ + return ntohs(rct->sequence_nr); +} + +static inline u16 get_prp_lan_id(struct prp_rct *rct) +{ + return ntohs(rct->lan_id_and_LSDU_size) >> 12; +} + +/* assume there is a valid rct */ +static inline bool prp_check_lsdu_size(struct sk_buff *skb, + struct prp_rct *rct, + bool is_sup) +{ + struct ethhdr *ethhdr; + int expected_lsdu_size; + + if (is_sup) { + expected_lsdu_size = HSR_V1_SUP_LSDUSIZE; + } else { + ethhdr = (struct ethhdr *)skb_mac_header(skb); + expected_lsdu_size = skb->len - 14; + if (ethhdr->h_proto == htons(ETH_P_8021Q)) + expected_lsdu_size -= 4; + } + + return (expected_lsdu_size == get_prp_LSDU_size(rct)); +} + #if IS_ENABLED(CONFIG_DEBUG_FS) void hsr_debugfs_rename(struct net_device *dev); void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev); diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index b5c0834de338..36d5fcf09c61 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -16,12 +16,22 @@ #include "hsr_forward.h" #include "hsr_framereg.h" +bool hsr_invalid_dan_ingress_frame(__be16 protocol) +{ + return (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR)); +} + static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct hsr_port *port; + struct hsr_priv *hsr; __be16 protocol; + /* Packets from dev_loopback_xmit() do not have L2 header, bail out */ + if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + return RX_HANDLER_PASS; + if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: skb invalid", __func__); return RX_HANDLER_PASS; @@ -30,6 +40,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) port = hsr_port_get_rcu(skb->dev); if (!port) goto finish_pass; + hsr = port->hsr; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ @@ -37,12 +48,23 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_consume; } + /* For HSR, only tagged frames are expected, but for PRP + * there could be non tagged frames as well from Single + * attached nodes (SANs). + */ protocol = eth_hdr(skb)->h_proto; - if (protocol != htons(ETH_P_PRP) && protocol != htons(ETH_P_HSR)) + if (hsr->proto_ops->invalid_dan_ingress_frame && + hsr->proto_ops->invalid_dan_ingress_frame(protocol)) goto finish_pass; skb_push(skb, ETH_HLEN); + if (skb_mac_header(skb) != skb->data) { + WARN_ONCE(1, "%s:%d: Malformed frame at source port %s)\n", + __func__, __LINE__, port->dev->name); + goto finish_consume; + } + hsr_forward_skb(skb, port); finish_consume: diff --git a/net/hsr/hsr_slave.h b/net/hsr/hsr_slave.h index 9708a4f0ec09..edc4612bb009 100644 --- a/net/hsr/hsr_slave.h +++ b/net/hsr/hsr_slave.h @@ -32,4 +32,6 @@ static inline struct hsr_port *hsr_port_get_rcu(const struct net_device *dev) rcu_dereference(dev->rx_handler_data) : NULL; } +bool hsr_invalid_dan_ingress_frame(__be16 protocol); + #endif /* __HSR_SLAVE_H */ -- cgit From 795ec4f572509979442b9f2d269487dd58e3595c Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Wed, 22 Jul 2020 10:40:22 -0400 Subject: net: prp: enhance debugfs to display PRP info Print PRP specific information from node table as part of debugfs node table display. Also display the node as DAN-H or DAN-P depending on the info from node table. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- net/hsr/hsr_debugfs.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index c1932c0a15be..3b6f675bd55a 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -24,7 +24,7 @@ static struct dentry *hsr_debugfs_root_dir; static void print_mac_address(struct seq_file *sfp, unsigned char *mac) { - seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:", + seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x ", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } @@ -35,20 +35,32 @@ hsr_node_table_show(struct seq_file *sfp, void *data) struct hsr_priv *priv = (struct hsr_priv *)sfp->private; struct hsr_node *node; - seq_puts(sfp, "Node Table entries\n"); - seq_puts(sfp, "MAC-Address-A, MAC-Address-B, time_in[A], "); - seq_puts(sfp, "time_in[B], Address-B port\n"); + seq_printf(sfp, "Node Table entries for (%s) device\n", + (priv->prot_version == PRP_V1 ? "PRP" : "HSR")); + seq_puts(sfp, "MAC-Address-A, MAC-Address-B, time_in[A], "); + seq_puts(sfp, "time_in[B], Address-B port, "); + if (priv->prot_version == PRP_V1) + seq_puts(sfp, "SAN-A, SAN-B, DAN-P\n"); + else + seq_puts(sfp, "DAN-H\n"); + rcu_read_lock(); list_for_each_entry_rcu(node, &priv->node_db, mac_list) { /* skip self node */ if (hsr_addr_is_self(priv, node->macaddress_A)) continue; print_mac_address(sfp, &node->macaddress_A[0]); - seq_puts(sfp, " "); print_mac_address(sfp, &node->macaddress_B[0]); - seq_printf(sfp, "0x%lx, ", node->time_in[HSR_PT_SLAVE_A]); - seq_printf(sfp, "0x%lx ", node->time_in[HSR_PT_SLAVE_B]); - seq_printf(sfp, "0x%x\n", node->addr_B_port); + seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]); + seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]); + seq_printf(sfp, "%14x, ", node->addr_B_port); + + if (priv->prot_version == PRP_V1) + seq_printf(sfp, "%5x, %5x, %5x\n", + node->san_a, node->san_b, + (node->san_a == 0 && node->san_b == 0)); + else + seq_printf(sfp, "%5x\n", 1); } rcu_read_unlock(); return 0; @@ -57,7 +69,8 @@ hsr_node_table_show(struct seq_file *sfp, void *data) /* hsr_node_table_open - Open the node_table file * * Description: - * This routine opens a debugfs file node_table of specific hsr device + * This routine opens a debugfs file node_table of specific hsr + * or prp device */ static int hsr_node_table_open(struct inode *inode, struct file *filp) -- cgit From cde1a8a992875a7479c4321b2a4a190c2e92ec2a Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Sun, 26 Jul 2020 23:12:28 +0200 Subject: Bluetooth: btusb: Fix and detect most of the Chinese Bluetooth controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason they tend to squat on the very first CSR/ Cambridge Silicon Radio VID/PID instead of paying fees. This is an extremely common problem; the issue goes as back as 2013 and these devices are only getting more popular, even rebranded by reputable vendors and sold by retailers everywhere. So, at this point in time there are hundreds of modern dongles reusing the ID of what originally was an early Bluetooth 1.1 controller. Linux is the only place where they don't work due to spotty checks in our detection code. It only covered a minimum subset. So what's the big idea? Take advantage of the fact that all CSR chips report the same internal version as both the LMP sub-version and HCI revision number. It always matches, couple that with the manufacturer code, that rarely lies, and we now have a good idea of who is who. Additionally, by compiling a list of user-reported HCI/lsusb dumps, and searching around for legit CSR dongles in similar product ranges we can find what CSR BlueCore firmware supported which Bluetooth versions. That way we can narrow down ranges of fakes for each of them. e.g. Real CSR dongles with LMP subversion 0x73 are old enough that support BT 1.1 only; so it's a dead giveaway when some third-party BT 4.0 dongle reuses it. So, to sum things up; there are multiple classes of fake controllers reusing the same 0A12:0001 VID/PID. This has been broken for a while. Known 'fake' bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 IC markings on 0x7558: FR3191AHAL 749H15143 (???) https://bugzilla.kernel.org/show_bug.cgi?id=60824 Fixes: 81cac64ba258ae (Deal with USB devices that are faking CSR vendor) Reported-by: Michał Wiśniewski Tested-by: Mike Johnson Tested-by: Ricardo Rodrigues Tested-by: M.Hanny Sabbagh Tested-by: Oussama BEN BRAHIM Tested-by: Ismael Ferreras Morezuelas Signed-off-by: Ismael Ferreras Morezuelas Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6509f785dd14..2891e16c1cc1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -605,7 +605,8 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) if (hdev->commands[8] & 0x01) hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); - if (hdev->commands[18] & 0x04) + if (hdev->commands[18] & 0x04 && + !test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) hci_req_add(req, HCI_OP_READ_DEF_ERR_DATA_REPORTING, 0, NULL); /* Some older Broadcom based Bluetooth 1.2 controllers do not @@ -850,7 +851,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) /* Set erroneous data reporting if supported to the wideband speech * setting value */ - if (hdev->commands[18] & 0x08) { + if (hdev->commands[18] & 0x08 && + !test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) { bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); -- cgit From 24b065727ceba53cc5bec0e725672417154df24f Mon Sep 17 00:00:00 2001 From: Max Chou Date: Thu, 23 Jul 2020 18:47:42 +0800 Subject: Bluetooth: Return NOTIFY_DONE for hci_suspend_notifier The original return is NOTIFY_STOP, but notifier_call_chain would stop the future call for register_pm_notifier even registered on other Kernel modules with the same priority which value is zero. Signed-off-by: Max Chou Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2891e16c1cc1..5394ab56c915 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3515,7 +3515,7 @@ done: bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d", action, ret); - return NOTIFY_STOP; + return NOTIFY_DONE; } /* Alloc HCI device */ -- cgit From 3c4f850e8441ac8b3b6dbaa6107604c4199ef01f Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 28 Jul 2020 01:36:04 -0400 Subject: xdp: Prevent kernel-infoleak in xsk_getsockopt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xsk_getsockopt() is copying uninitialized stack memory to userspace when 'extra_stats' is 'false'. Fix it. Doing '= {};' is sufficient since currently 'struct xdp_statistics' is defined as follows: struct xdp_statistics { __u64 rx_dropped; __u64 rx_invalid_descs; __u64 tx_invalid_descs; __u64 rx_ring_full; __u64 rx_fill_ring_empty_descs; __u64 tx_ring_empty_descs; }; When being copied to the userspace, 'stats' will not contain any uninitialized 'holes' between struct fields. Fixes: 8aa5a33578e9 ("xsk: Add new statistics") Suggested-by: Dan Carpenter Signed-off-by: Peilin Ye Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Acked-by: Song Liu Acked-by: Arnd Bergmann Link: https://lore.kernel.org/bpf/20200728053604.404631-1-yepeilin.cs@gmail.com --- net/xdp/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2e94a7e94671..c3231620d210 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -840,7 +840,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, switch (optname) { case XDP_STATISTICS: { - struct xdp_statistics stats; + struct xdp_statistics stats = {}; bool extra_stats = true; size_t stats_size; -- cgit From 4e8c36c3b0d73d46aa27cfd4308aaa445a1067df Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Tue, 28 Jul 2020 09:58:07 -0700 Subject: Bluetooth: Fix suspend notifier race Unregister from suspend notifications and cancel suspend preparations before running hci_dev_do_close. Otherwise, the suspend notifier may race with unregister and cause cmd_timeout even after hdev has been freed. Below is the trace from when this panic was seen: [ 832.578518] Bluetooth: hci_core.c:hci_cmd_timeout() hci0: command 0x0c05 tx timeout [ 832.586200] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 832.586203] #PF: supervisor read access in kernel mode [ 832.586205] #PF: error_code(0x0000) - not-present page [ 832.586206] PGD 0 P4D 0 [ 832.586210] PM: suspend exit [ 832.608870] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 832.613232] CPU: 3 PID: 10755 Comm: kworker/3:7 Not tainted 5.4.44-04894-g1e9dbb96a161 #1 [ 832.630036] Workqueue: events hci_cmd_timeout [bluetooth] [ 832.630046] RIP: 0010:__queue_work+0xf0/0x374 [ 832.630051] RSP: 0018:ffff9b5285f1fdf8 EFLAGS: 00010046 [ 832.674033] RAX: ffff8a97681bac00 RBX: 0000000000000000 RCX: ffff8a976a000600 [ 832.681162] RDX: 0000000000000000 RSI: 0000000000000009 RDI: ffff8a976a000748 [ 832.688289] RBP: ffff9b5285f1fe38 R08: 0000000000000000 R09: ffff8a97681bac00 [ 832.695418] R10: 0000000000000002 R11: ffff8a976a0006d8 R12: ffff8a9745107600 [ 832.698045] usb 1-6: new full-speed USB device number 119 using xhci_hcd [ 832.702547] R13: ffff8a9673658850 R14: 0000000000000040 R15: 000000000000001e [ 832.702549] FS: 0000000000000000(0000) GS:ffff8a976af80000(0000) knlGS:0000000000000000 [ 832.702550] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 832.702550] CR2: 0000000000000000 CR3: 000000010415a000 CR4: 00000000003406e0 [ 832.702551] Call Trace: [ 832.702558] queue_work_on+0x3f/0x68 [ 832.702562] process_one_work+0x1db/0x396 [ 832.747397] worker_thread+0x216/0x375 [ 832.751147] kthread+0x138/0x140 [ 832.754377] ? pr_cont_work+0x58/0x58 [ 832.758037] ? kthread_blkcg+0x2e/0x2e [ 832.761787] ret_from_fork+0x22/0x40 [ 832.846191] ---[ end trace fa93f466da517212 ]--- Fixes: 9952d90ea2885 ("Bluetooth: Handle PM_SUSPEND_PREPARE and PM_POST_SUSPEND") Signed-off-by: Abhishek Pandit-Subedi Reviewed-by: Miao-chen Chou Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5394ab56c915..4ba23b821cbf 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3767,9 +3767,10 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->power_on); - hci_dev_do_close(hdev); - unregister_pm_notifier(&hdev->suspend_notifier); + cancel_work_sync(&hdev->suspend_prepare); + + hci_dev_do_close(hdev); if (!test_bit(HCI_INIT, &hdev->flags) && !hci_dev_test_flag(hdev, HCI_SETUP) && -- cgit From a3ad434ad782085670d74e1a35c4c3f6043749f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:38:33 +0200 Subject: netfilter: arp_tables: restore a SPDX identifier This was accidentally removed in an unrelated commit. Fixes: c2f12630c60f ("netfilter: switch nf_setsockopt to sockptr_t") Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f5b26ef17820..9a1567dbc022 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1,4 +1,4 @@ - +// SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code for ARP packets. * -- cgit From d3c48151512922dd35f1f393b30b9138e4441d14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:38:35 +0200 Subject: net: remove sockptr_advance sockptr_advance never properly worked. Replace it with _offset variants of copy_from_sockptr and copy_to_sockptr. Fixes: ba423fdaa589 ("net: add a new sockptr_t type") Reported-by: Jason A. Donenfeld Reported-by: Ido Schimmel Signed-off-by: Christoph Hellwig Acked-by: Jason A. Donenfeld Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- net/dccp/proto.c | 5 ++--- net/ipv4/netfilter/arp_tables.c | 8 ++++---- net/ipv4/netfilter/ip_tables.c | 8 ++++---- net/ipv4/tcp.c | 5 +++-- net/ipv6/ip6_flowlabel.c | 11 ++++++----- net/ipv6/netfilter/ip6_tables.c | 8 ++++---- net/netfilter/x_tables.c | 7 ++++--- net/tls/tls_main.c | 6 +++--- 8 files changed, 30 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 2e9e8449698f..d148ab1530e5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -426,9 +426,8 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return -ENOMEM; sl->dccpsl_nr = optlen / sizeof(u32) - 1; - sockptr_advance(optval, sizeof(service)); - if (copy_from_sockptr(sl->dccpsl_list, optval, - optlen - sizeof(service)) || + if (copy_from_sockptr_offset(sl->dccpsl_list, optval, + sizeof(service), optlen - sizeof(service)) || dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { kfree(sl); return -EFAULT; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 9a1567dbc022..d1e04d2b5170 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -971,8 +971,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - sockptr_advance(arg, sizeof(tmp)); - if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1267,8 +1267,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - sockptr_advance(arg, sizeof(tmp)); - if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f2a9680303d8..f15bc21d7301 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1126,8 +1126,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - sockptr_advance(arg, sizeof(tmp)); - if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1508,8 +1508,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - sockptr_advance(arg, sizeof(tmp)); - if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27de9380ed14..4afec552f211 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2801,12 +2801,13 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, { struct tcp_sock *tp = tcp_sk(sk); struct tcp_repair_opt opt; + size_t offset = 0; while (len >= sizeof(opt)) { - if (copy_from_sockptr(&opt, optbuf, sizeof(opt))) + if (copy_from_sockptr_offset(&opt, optbuf, offset, sizeof(opt))) return -EFAULT; - sockptr_advance(optbuf, sizeof(opt)); + offset += sizeof(opt); len -= sizeof(opt); switch (opt.opt_code) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 215b6f5e733e..2d655260dedc 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -401,8 +401,8 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, memset(fl->opt, 0, sizeof(*fl->opt)); fl->opt->tot_len = sizeof(*fl->opt) + olen; err = -EFAULT; - sockptr_advance(optval, CMSG_ALIGN(sizeof(*freq))); - if (copy_from_sockptr(fl->opt + 1, optval, olen)) + if (copy_from_sockptr_offset(fl->opt + 1, optval, + CMSG_ALIGN(sizeof(*freq)), olen)) goto done; msg.msg_controllen = olen; @@ -703,9 +703,10 @@ release: goto recheck; if (!freq->flr_label) { - sockptr_advance(optval, - offsetof(struct in6_flowlabel_req, flr_label)); - if (copy_to_sockptr(optval, &fl->label, sizeof(fl->label))) { + size_t offset = offsetof(struct in6_flowlabel_req, flr_label); + + if (copy_to_sockptr_offset(optval, offset, &fl->label, + sizeof(fl->label))) { /* Intentionally ignore fault. */ } } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1d52957a413f..2e2119bfcf13 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1143,8 +1143,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - sockptr_advance(arg, sizeof(tmp)); - if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } @@ -1517,8 +1517,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) return -ENOMEM; loc_cpu_entry = newinfo->entries; - sockptr_advance(arg, sizeof(tmp)); - if (copy_from_sockptr(loc_cpu_entry, arg, tmp.size) != 0) { + if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), + tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b97eb4b538fd..91bf6635ea9e 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1050,6 +1050,7 @@ EXPORT_SYMBOL_GPL(xt_check_target); void *xt_copy_counters(sockptr_t arg, unsigned int len, struct xt_counters_info *info) { + size_t offset; void *mem; u64 size; @@ -1067,7 +1068,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len, memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; - sockptr_advance(arg, sizeof(compat_tmp)); + offset = sizeof(compat_tmp); } else #endif { @@ -1078,7 +1079,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len, if (copy_from_sockptr(info, arg, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); - sockptr_advance(arg, sizeof(*info)); + offset = sizeof(*info); } info->name[sizeof(info->name) - 1] = '\0'; @@ -1092,7 +1093,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len, if (!mem) return ERR_PTR(-ENOMEM); - if (copy_from_sockptr(mem, arg, len) == 0) + if (copy_from_sockptr_offset(mem, arg, offset, len) == 0) return mem; vfree(mem); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index d77f7d821130..bbc52b088d29 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -522,9 +522,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - sockptr_advance(optval, sizeof(*crypto_info)); - rc = copy_from_sockptr(crypto_info + 1, optval, - optlen - sizeof(*crypto_info)); + rc = copy_from_sockptr_offset(crypto_info + 1, optval, + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; -- cgit From a31edb2059ed4e498f9aa8230c734b59d0ad797a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jul 2020 18:38:36 +0200 Subject: net: improve the user pointer check in init_user_sockptr Make sure not just the pointer itself but the whole range lies in the user address space. For that pass the length and then use the access_ok helper to do the check. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: David Laight Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/ipv4/bpfilter/sockopt.c | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 94f18d2352d0..545b2640f019 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -65,7 +65,7 @@ int bpfilter_ip_get_sockopt(struct sock *sk, int optname, if (get_user(len, optlen)) return -EFAULT; - err = init_user_sockptr(&optval, user_optval); + err = init_user_sockptr(&optval, user_optval, len); if (err) return err; return bpfilter_mbox_request(sk, optname, optval, len, false); diff --git a/net/socket.c b/net/socket.c index 94ca4547cd7c..aff52e81653c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2105,7 +2105,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; - err = init_user_sockptr(&optval, user_optval); + err = init_user_sockptr(&optval, user_optval, optlen); if (err) return err; -- cgit From 0bac966a1f2ae0e3cbc259c5bb10aab7bbcf8f4b Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:11:59 -0700 Subject: mptcp: Allow DATA_FIN in headers without TCP FIN RFC 8684-compliant DATA_FIN needs to be sent and ack'd before subflows are closed with TCP FIN, so write DATA_FIN DSS headers whenever their transmission has been enabled by the MPTCP connection-level socket. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 3bc56eb608d8..0b122b2a9c69 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -482,17 +482,10 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, struct mptcp_sock *msk; unsigned int ack_size; bool ret = false; - u8 tcp_fin; - if (skb) { - mpext = mptcp_get_ext(skb); - tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - } else { - mpext = NULL; - tcp_fin = 0; - } + mpext = skb ? mptcp_get_ext(skb) : NULL; - if (!skb || (mpext && mpext->use_map) || tcp_fin) { + if (!skb || (mpext && mpext->use_map) || subflow->data_fin_tx_enable) { unsigned int map_size; map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; @@ -502,7 +495,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (mpext) opts->ext_copy = *mpext; - if (skb && tcp_fin && subflow->data_fin_tx_enable) + if (skb && subflow->data_fin_tx_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; } -- cgit From 57baaf2875404b555587391608da1625863086fa Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:00 -0700 Subject: mptcp: Return EPIPE if sending is shut down during a sendmsg A MPTCP socket where sending has been shut down should not attempt to send additional data, since DATA_FIN has already been sent. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2891ae8a1028..b3c3dbc89b3f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -748,6 +748,11 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) restart: mptcp_clean_una(sk); + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { + ret = -EPIPE; + goto out; + } + wait_for_sndbuf: __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); -- cgit From 242e63f651e94da5fa3cbe6ae0a62dd219226418 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:01 -0700 Subject: mptcp: Remove outdated and incorrect comment mptcp_close() acquires the msk lock, so it clearly should not be held before the function is called. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b3c3dbc89b3f..7d7e0fa17219 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1421,7 +1421,6 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how, release_sock(ssk); } -/* Called with msk lock held, releases such lock before returning */ static void mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow, *tmp; -- cgit From 7279da6145bbb2e41a61def5d9bca5b65f12de9d Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:02 -0700 Subject: mptcp: Use MPTCP-level flag for sending DATA_FIN Since DATA_FIN information is the same for every subflow, store it only in the mptcp_sock. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 18 ++++++++++++------ net/mptcp/protocol.c | 21 +++++---------------- net/mptcp/protocol.h | 3 +-- 3 files changed, 18 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 0b122b2a9c69..f157cb7e14c0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -451,6 +451,8 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, struct sk_buff *skb, struct mptcp_ext *ext) { + u64 data_fin_tx_seq = READ_ONCE(mptcp_sk(subflow->conn)->write_seq); + if (!ext->use_map || !skb->len) { /* RFC6824 requires a DSS mapping with specific values * if DATA_FIN is set but no data payload is mapped @@ -458,10 +460,13 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, ext->data_fin = 1; ext->use_map = 1; ext->dsn64 = 1; - ext->data_seq = subflow->data_fin_tx_seq; + /* The write_seq value has already been incremented, so + * the actual sequence number for the DATA_FIN is one less. + */ + ext->data_seq = data_fin_tx_seq - 1; ext->subflow_seq = 0; ext->data_len = 1; - } else if (ext->data_seq + ext->data_len == subflow->data_fin_tx_seq) { + } else if (ext->data_seq + ext->data_len == data_fin_tx_seq) { /* If there's an existing DSS mapping and it is the * final mapping, DATA_FIN consumes 1 additional byte of * mapping space. @@ -477,15 +482,17 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int dss_size = 0; + u64 snd_data_fin_enable; struct mptcp_ext *mpext; - struct mptcp_sock *msk; unsigned int ack_size; bool ret = false; mpext = skb ? mptcp_get_ext(skb) : NULL; + snd_data_fin_enable = READ_ONCE(msk->snd_data_fin_enable); - if (!skb || (mpext && mpext->use_map) || subflow->data_fin_tx_enable) { + if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { unsigned int map_size; map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; @@ -495,7 +502,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (mpext) opts->ext_copy = *mpext; - if (skb && subflow->data_fin_tx_enable) + if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; } @@ -504,7 +511,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, * if the first subflow may have the already the remote key handy */ opts->ext_copy.use_ack = 0; - msk = mptcp_sk(subflow->conn); if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7d7e0fa17219..dd403ba3679a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1391,8 +1391,7 @@ static void mptcp_cancel_work(struct sock *sk) sock_put(sk); } -static void mptcp_subflow_shutdown(struct sock *ssk, int how, - bool data_fin_tx_enable, u64 data_fin_tx_seq) +static void mptcp_subflow_shutdown(struct sock *ssk, int how) { lock_sock(ssk); @@ -1405,14 +1404,6 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how, tcp_disconnect(ssk, O_NONBLOCK); break; default: - if (data_fin_tx_enable) { - struct mptcp_subflow_context *subflow; - - subflow = mptcp_subflow_ctx(ssk); - subflow->data_fin_tx_seq = data_fin_tx_seq; - subflow->data_fin_tx_enable = 1; - } - ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); break; @@ -1426,7 +1417,6 @@ static void mptcp_close(struct sock *sk, long timeout) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); LIST_HEAD(conn_list); - u64 data_fin_tx_seq; lock_sock(sk); @@ -1440,7 +1430,7 @@ static void mptcp_close(struct sock *sk, long timeout) spin_unlock_bh(&msk->join_list_lock); list_splice_init(&msk->conn_list, &conn_list); - data_fin_tx_seq = msk->write_seq; + msk->snd_data_fin_enable = 1; __mptcp_clear_xmit(sk); @@ -1448,9 +1438,6 @@ static void mptcp_close(struct sock *sk, long timeout) list_for_each_entry_safe(subflow, tmp, &conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - subflow->data_fin_tx_seq = data_fin_tx_seq; - subflow->data_fin_tx_enable = 1; __mptcp_close_ssk(sk, ssk, subflow, timeout); } @@ -2146,10 +2133,12 @@ static int mptcp_shutdown(struct socket *sock, int how) } __mptcp_flush_join_list(msk); + msk->snd_data_fin_enable = 1; + mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); - mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq); + mptcp_subflow_shutdown(tcp_sk, how); } /* Wake up anyone sleeping in poll. */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 67634b595466..3f49cc105772 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -199,6 +199,7 @@ struct mptcp_sock { unsigned long flags; bool can_ack; bool fully_established; + bool snd_data_fin_enable; spinlock_t join_list_lock; struct work_struct work; struct list_head conn_list; @@ -291,10 +292,8 @@ struct mptcp_subflow_context { backup : 1, data_avail : 1, rx_eof : 1, - data_fin_tx_enable : 1, use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ can_ack : 1; /* only after processing the remote a key */ - u64 data_fin_tx_seq; u32 remote_nonce; u64 thmac; u32 local_nonce; -- cgit From 3721b9b64676b3377a966f3d96acafd70bb32dd9 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:03 -0700 Subject: mptcp: Track received DATA_FIN sequence number and add related helpers Incoming DATA_FIN headers need to propagate the presence of the DATA_FIN bit and the associated sequence number to the MPTCP layer, even when arriving on a bare ACK that does not get added to the receive queue. Add structure members to store the DATA_FIN information and helpers to set and check those values. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 16 ++++++++ net/mptcp/protocol.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++----- net/mptcp/protocol.h | 3 ++ 3 files changed, 115 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index f157cb7e14c0..38583d1b9b5f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -782,6 +782,22 @@ static void update_una(struct mptcp_sock *msk, } } +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) +{ + /* Skip if DATA_FIN was already received. + * If updating simultaneously with the recvmsg loop, values + * should match. If they mismatch, the peer is misbehaving and + * we will prefer the most recent information. + */ + if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first)) + return false; + + WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq); + WRITE_ONCE(msk->rcv_data_fin, 1); + + return true; +} + static bool add_addr_hmac_valid(struct mptcp_sock *msk, struct mptcp_options_received *mp_opt) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index dd403ba3679a..e1c71bfd61a3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -16,6 +16,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include #endif @@ -163,6 +164,101 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, return mptcp_subflow_data_available(ssk); } +static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (READ_ONCE(msk->rcv_data_fin) && + ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) { + u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq); + + if (msk->ack_seq == rcv_data_fin_seq) { + if (seq) + *seq = rcv_data_fin_seq; + + return true; + } + } + + return false; +} + +static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) +{ + long tout = ssk && inet_csk(ssk)->icsk_pending ? + inet_csk(ssk)->icsk_timeout - jiffies : 0; + + if (tout <= 0) + tout = mptcp_sk(sk)->timer_ival; + mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; +} + +static void mptcp_check_data_fin(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + u64 rcv_data_fin_seq; + + if (__mptcp_check_fallback(msk) || !msk->first) + return; + + /* Need to ack a DATA_FIN received from a peer while this side + * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2. + * msk->rcv_data_fin was set when parsing the incoming options + * at the subflow level and the msk lock was not held, so this + * is the first opportunity to act on the DATA_FIN and change + * the msk state. + * + * If we are caught up to the sequence number of the incoming + * DATA_FIN, send the DATA_ACK now and do state transition. If + * not caught up, do nothing and let the recv code send DATA_ACK + * when catching up. + */ + + if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) { + struct mptcp_subflow_context *subflow; + + msk->ack_seq++; + WRITE_ONCE(msk->rcv_data_fin, 0); + + sk->sk_shutdown |= RCV_SHUTDOWN; + + switch (sk->sk_state) { + case TCP_ESTABLISHED: + inet_sk_state_store(sk, TCP_CLOSE_WAIT); + break; + case TCP_FIN_WAIT1: + inet_sk_state_store(sk, TCP_CLOSING); + break; + case TCP_FIN_WAIT2: + inet_sk_state_store(sk, TCP_CLOSE); + // @@ Close subflows now? + break; + default: + /* Other states not expected */ + WARN_ON_ONCE(1); + break; + } + + mptcp_set_timeout(sk, NULL); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + tcp_send_ack(ssk); + release_sock(ssk); + } + + sk->sk_state_change(sk); + + if (sk->sk_shutdown == SHUTDOWN_MASK || + sk->sk_state == TCP_CLOSE) + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); + else + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + } +} + static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sock *ssk, unsigned int *bytes) @@ -303,16 +399,6 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk) spin_unlock_bh(&msk->join_list_lock); } -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) -{ - long tout = ssk && inet_csk(ssk)->icsk_pending ? - inet_csk(ssk)->icsk_timeout - jiffies : 0; - - if (tout <= 0) - tout = mptcp_sk(sk)->timer_ival; - mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; -} - static bool mptcp_timer_pending(struct sock *sk) { return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3f49cc105772..beb34b8a5363 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -193,12 +193,14 @@ struct mptcp_sock { u64 remote_key; u64 write_seq; u64 ack_seq; + u64 rcv_data_fin_seq; atomic64_t snd_una; unsigned long timer_ival; u32 token; unsigned long flags; bool can_ack; bool fully_established; + bool rcv_data_fin; bool snd_data_fin_enable; spinlock_t join_list_lock; struct work_struct work; @@ -385,6 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq); void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) -- cgit From 6920b851584cc69a61ebf2cff3948bb153bcef20 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:04 -0700 Subject: mptcp: Add mptcp_close_state() helper This will be used to transition to the appropriate state on close and determine if a DATA_FIN needs to be sent for that state transition. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e1c71bfd61a3..51370b69e30b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1498,6 +1498,33 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how) release_sock(ssk); } +static const unsigned char new_state[16] = { + /* current state: new state: action: */ + [0 /* (Invalid) */] = TCP_CLOSE, + [TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_SYN_SENT] = TCP_CLOSE, + [TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, + [TCP_FIN_WAIT1] = TCP_FIN_WAIT1, + [TCP_FIN_WAIT2] = TCP_FIN_WAIT2, + [TCP_TIME_WAIT] = TCP_CLOSE, /* should not happen ! */ + [TCP_CLOSE] = TCP_CLOSE, + [TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, + [TCP_LAST_ACK] = TCP_LAST_ACK, + [TCP_LISTEN] = TCP_CLOSE, + [TCP_CLOSING] = TCP_CLOSING, + [TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ +}; + +static int mptcp_close_state(struct sock *sk) +{ + int next = (int)new_state[sk->sk_state]; + int ns = next & TCP_STATE_MASK; + + inet_sk_state_store(sk, ns); + + return next & TCP_ACTION_FIN; +} + static void mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow, *tmp; -- cgit From 16a9a9da17234797b01ca05024d33269872a0ae0 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:05 -0700 Subject: mptcp: Add helper to process acks of DATA_FIN After DATA_FIN has been sent, the peer will acknowledge it. An ack of the relevant MPTCP-level sequence number will update the MPTCP connection state appropriately. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 51370b69e30b..b3350830e14d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -143,6 +143,14 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->offset = offset; } +static void mptcp_stop_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + mptcp_sk(sk)->timer_ival = 0; +} + /* both sockets must be locked */ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, struct sock *ssk) @@ -164,6 +172,42 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, return mptcp_subflow_data_available(ssk); } +static void mptcp_check_data_fin_ack(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (__mptcp_check_fallback(msk)) + return; + + /* Look for an acknowledged DATA_FIN */ + if (((1 << sk->sk_state) & + (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && + msk->write_seq == atomic64_read(&msk->snd_una)) { + mptcp_stop_timer(sk); + + WRITE_ONCE(msk->snd_data_fin_enable, 0); + + switch (sk->sk_state) { + case TCP_FIN_WAIT1: + inet_sk_state_store(sk, TCP_FIN_WAIT2); + sk->sk_state_change(sk); + break; + case TCP_CLOSING: + fallthrough; + case TCP_LAST_ACK: + inet_sk_state_store(sk, TCP_CLOSE); + sk->sk_state_change(sk); + break; + } + + if (sk->sk_shutdown == SHUTDOWN_MASK || + sk->sk_state == TCP_CLOSE) + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); + else + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + } +} + static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -222,6 +266,8 @@ static void mptcp_check_data_fin(struct sock *sk) WRITE_ONCE(msk->rcv_data_fin, 0); sk->sk_shutdown |= RCV_SHUTDOWN; + smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ + set_bit(MPTCP_DATA_READY, &msk->flags); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -455,14 +501,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) } } -static void mptcp_stop_timer(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); - mptcp_sk(sk)->timer_ival = 0; -} - static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) { const struct sock *sk = (const struct sock *)msk; -- cgit From 43b54c6ee382f026fc93babf5301ec79e1c9614a Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:06 -0700 Subject: mptcp: Use full MPTCP-level disconnect state machine RFC 8684 appendix D describes the connection state machine for MPTCP. This patch implements the DATA_FIN / DATA_ACK exchanges and MPTCP-level socket state changes described in that appendix, rather than simply sending DATA_FIN along with TCP FIN when disconnecting subflows. DATA_FIN is now sent and acknowledged before shutting down the subflows. Received DATA_FIN information (if not part of a data packet) is written to the MPTCP socket when the incoming DSS option is parsed by the subflow, and the MPTCP worker is scheduled to process the flag. DATA_FIN received as part of a full DSS mapping will be handled when the mapping is processed. The DATA_FIN is acknowledged by the worker if the reader is caught up. If there is still data to be moved to the MPTCP-level queue, ack_seq will be incremented to account for the DATA_FIN when it reaches the end of the stream and a DATA_ACK will be sent to the peer. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 11 +++++++ net/mptcp/protocol.c | 87 +++++++++++++++++++++++++++++++++++++++++++--------- net/mptcp/subflow.c | 11 +++++-- 3 files changed, 92 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 38583d1b9b5f..b4458ecd01f8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -868,6 +868,17 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, if (mp_opt.use_ack) update_una(msk, &mp_opt); + /* Zero-length packets, like bare ACKs carrying a DATA_FIN, are + * dropped by the caller and not propagated to the MPTCP layer. + * Copy the DATA_FIN information now. + */ + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { + if (mp_opt.data_fin && mp_opt.data_len == 1 && + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) && + schedule_work(&msk->work)) + sock_hold(subflow->conn); + } + mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b3350830e14d..f264ea15e081 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -381,6 +381,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, *bytes = moved; + /* If the moves have caught up with the DATA_FIN sequence number + * it's time to ack the DATA_FIN and change socket state, but + * this is not a good place to change state. Let the workqueue + * do it. + */ + if (mptcp_pending_data_fin(sk, NULL) && + schedule_work(&msk->work)) + sock_hold(sk); + return done; } @@ -466,7 +475,8 @@ void mptcp_data_acked(struct sock *sk) { mptcp_reset_timer(sk); - if (!sk_stream_is_writeable(sk) && + if ((!sk_stream_is_writeable(sk) || + (inet_sk_state_load(sk) != TCP_ESTABLISHED)) && schedule_work(&mptcp_sk(sk)->work)) sock_hold(sk); } @@ -1384,6 +1394,7 @@ static void mptcp_worker(struct work_struct *work) lock_sock(sk); mptcp_clean_una(sk); + mptcp_check_data_fin_ack(sk); __mptcp_flush_join_list(msk); __mptcp_move_skbs(msk); @@ -1393,6 +1404,8 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); + mptcp_check_data_fin(sk); + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; @@ -1515,7 +1528,7 @@ static void mptcp_cancel_work(struct sock *sk) sock_put(sk); } -static void mptcp_subflow_shutdown(struct sock *ssk, int how) +static void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) { lock_sock(ssk); @@ -1528,8 +1541,15 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how) tcp_disconnect(ssk, O_NONBLOCK); break; default: - ssk->sk_shutdown |= how; - tcp_shutdown(ssk, how); + if (__mptcp_check_fallback(mptcp_sk(sk))) { + pr_debug("Fallback"); + ssk->sk_shutdown |= how; + tcp_shutdown(ssk, how); + } else { + pr_debug("Sending DATA_FIN on subflow %p", ssk); + mptcp_set_timeout(sk, ssk); + tcp_send_ack(ssk); + } break; } @@ -1570,9 +1590,35 @@ static void mptcp_close(struct sock *sk, long timeout) LIST_HEAD(conn_list); lock_sock(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + if (sk->sk_state == TCP_LISTEN) { + inet_sk_state_store(sk, TCP_CLOSE); + goto cleanup; + } else if (sk->sk_state == TCP_CLOSE) { + goto cleanup; + } + + if (__mptcp_check_fallback(msk)) { + goto update_state; + } else if (mptcp_close_state(sk)) { + pr_debug("Sending DATA_FIN sk=%p", sk); + WRITE_ONCE(msk->write_seq, msk->write_seq + 1); + WRITE_ONCE(msk->snd_data_fin_enable, 1); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); + + mptcp_subflow_shutdown(sk, tcp_sk, SHUTDOWN_MASK); + } + } + sk_stream_wait_close(sk, timeout); + +update_state: inet_sk_state_store(sk, TCP_CLOSE); +cleanup: /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -1581,8 +1627,6 @@ static void mptcp_close(struct sock *sk, long timeout) spin_unlock_bh(&msk->join_list_lock); list_splice_init(&msk->conn_list, &conn_list); - msk->snd_data_fin_enable = 1; - __mptcp_clear_xmit(sk); release_sock(sk); @@ -2265,11 +2309,8 @@ static int mptcp_shutdown(struct socket *sock, int how) pr_debug("sk=%p, how=%d", msk, how); lock_sock(sock->sk); - if (how == SHUT_WR || how == SHUT_RDWR) - inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); how++; - if ((how & ~SHUTDOWN_MASK) || !how) { ret = -EINVAL; goto out_unlock; @@ -2283,13 +2324,31 @@ static int mptcp_shutdown(struct socket *sock, int how) sock->state = SS_CONNECTED; } - __mptcp_flush_join_list(msk); - msk->snd_data_fin_enable = 1; + /* If we've already sent a FIN, or it's a closed state, skip this. */ + if (__mptcp_check_fallback(msk)) { + if (how == SHUT_WR || how == SHUT_RDWR) + inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); - mptcp_for_each_subflow(msk, subflow) { - struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); + mptcp_for_each_subflow(msk, subflow) { + struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); - mptcp_subflow_shutdown(tcp_sk, how); + mptcp_subflow_shutdown(sock->sk, tcp_sk, how); + } + } else if ((how & SEND_SHUTDOWN) && + ((1 << sock->sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | + TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) && + mptcp_close_state(sock->sk)) { + __mptcp_flush_join_list(msk); + + WRITE_ONCE(msk->write_seq, msk->write_seq + 1); + WRITE_ONCE(msk->snd_data_fin_enable, 1); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); + + mptcp_subflow_shutdown(sock->sk, tcp_sk, how); + } } /* Wake up anyone sleeping in poll. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e645483d1200..7ab2a52ad150 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -598,7 +598,8 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) return true; } -static enum mapping_status get_mapping_status(struct sock *ssk) +static enum mapping_status get_mapping_status(struct sock *ssk, + struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_ext *mpext; @@ -648,7 +649,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk) if (mpext->data_fin == 1) { if (data_len == 1) { - pr_debug("DATA_FIN with no payload"); + mptcp_update_rcv_data_fin(msk, mpext->data_seq); + pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS * option before the previous mapping @@ -660,6 +662,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk) } else { return MAPPING_DATA_FIN; } + } else { + mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len); + pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len); } /* Adjust for DATA_FIN using 1 byte of sequence space */ @@ -748,7 +753,7 @@ static bool subflow_check_data_avail(struct sock *ssk) u64 ack_seq; u64 old_ack; - status = get_mapping_status(ssk); + status = get_mapping_status(ssk, msk); pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status); if (status == MAPPING_INVALID) { ssk->sk_err = EBADMSG; -- cgit From 067a0b3dc52f0f79b9fe64ff8d9bcbb0ffbcf8fc Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:07 -0700 Subject: mptcp: Only use subflow EOF signaling on fallback connections The MPTCP state machine handles disconnections on non-fallback connections, but the mptcp_sock still needs to get notified when fallback subflows disconnect. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 7ab2a52ad150..1c8482bc2ce5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1159,7 +1159,8 @@ static void subflow_state_change(struct sock *sk) if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); - if (!(parent->sk_shutdown & RCV_SHUTDOWN) && + if (__mptcp_check_fallback(mptcp_sk(parent)) && + !(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; mptcp_subflow_eof(parent); -- cgit From 06827b348b1d43850a63c3e490fe9712c124fa0c Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:08 -0700 Subject: mptcp: Skip unnecessary skb extension allocation for bare acks Bare TCP ack skbs are freed right after MPTCP sees them, so the work to allocate, zero, and populate the MPTCP skb extension is wasted. Detect these skbs and do not add skb extensions to them. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index b4458ecd01f8..7fa822b55c34 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -868,15 +868,18 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, if (mp_opt.use_ack) update_una(msk, &mp_opt); - /* Zero-length packets, like bare ACKs carrying a DATA_FIN, are - * dropped by the caller and not propagated to the MPTCP layer. - * Copy the DATA_FIN information now. + /* Zero-data-length packets are dropped by the caller and not + * propagated to the MPTCP layer, so the skb extension does not + * need to be allocated or populated. DATA_FIN information, if + * present, needs to be updated here before the skb is freed. */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) && schedule_work(&msk->work)) sock_hold(subflow->conn); + + return; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); -- cgit From c75293925f24630326abdf79751d980ec3878f65 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:09 -0700 Subject: mptcp: Safely read sequence number when lock isn't held The MPTCP socket's write_seq member should be read with READ_ONCE() when the msk lock is not held. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f264ea15e081..f2455a68d231 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1269,7 +1269,7 @@ static void mptcp_retransmit_handler(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (atomic64_read(&msk->snd_una) == msk->write_seq) { + if (atomic64_read(&msk->snd_una) == READ_ONCE(msk->write_seq)) { mptcp_stop_timer(sk); } else { set_bit(MPTCP_WORK_RTX, &msk->flags); -- cgit From 721e9089905ab7aebd5364b86b5f068f632a0e49 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 28 Jul 2020 15:12:10 -0700 Subject: mptcp: Safely store sequence number when sending data The MPTCP socket's write_seq member can be read without the msk lock held, so use WRITE_ONCE() to store it. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f2455a68d231..687f0bea2b35 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -793,7 +793,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, out: if (!retransmission) pfrag->offset += frag_truesize; - *write_seq += ret; + WRITE_ONCE(*write_seq, *write_seq + ret); mptcp_subflow_ctx(ssk)->rel_write_seq += ret; return ret; -- cgit From 608b4adab17889576c6636311f495b1a52418c69 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 25 Jul 2020 13:17:07 -0700 Subject: net_sched: initialize timer earlier in red_init() When red_init() fails, red_destroy() is called to clean up. If the timer is not initialized yet, del_timer_sync() will complain. So we have to move timer_setup() before any failure. Reported-and-tested-by: syzbot+6e95a4fabf88dc217145@syzkaller.appspotmail.com Fixes: aee9caa03fc3 ("net: sched: sch_red: Add qevents "early_drop" and "mark"") Cc: Petr Machata Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- net/sched/sch_red.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 4cc0ad0b1189..deac82f3ad7b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -333,6 +333,10 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_RED_MAX + 1]; int err; + q->qdisc = &noop_qdisc; + q->sch = sch; + timer_setup(&q->adapt_timer, red_adaptative_timer, 0); + if (!opt) return -EINVAL; @@ -341,10 +345,6 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - q->qdisc = &noop_qdisc; - q->sch = sch; - timer_setup(&q->adapt_timer, red_adaptative_timer, 0); - err = __red_change(sch, tb, extack); if (err) return err; -- cgit From b9aaec8f0be518096d1377082e0abe6a85e86ff3 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Sun, 26 Jul 2020 15:48:16 -0700 Subject: fib: use indirect call wrappers in the most common fib_rules_ops This avoids another inderect call per RX packet which save us around 20-40 ns. Changelog: v1 -> v2: - Move declaraions to fib_rules.h to remove warnings Reported-by: kernel test robot Signed-off-by: Brian Vazquez Signed-off-by: David S. Miller --- net/core/fib_rules.c | 18 ++++++++++++++---- net/ipv4/fib_rules.c | 12 ++++++++---- net/ipv6/fib6_rules.c | 12 ++++++++---- 3 files changed, 30 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bd7eba9066f8..e7a8f87b0bb2 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -14,6 +14,7 @@ #include #include #include +#include static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(0), @@ -267,7 +268,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, uid_gt(fl->flowi_uid, rule->uid_range.end)) goto out; - ret = ops->match(rule, fl, flags); + ret = INDIRECT_CALL_INET(ops->match, + fib6_rule_match, + fib4_rule_match, + rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; } @@ -298,9 +302,15 @@ jumped: } else if (rule->action == FR_ACT_NOP) continue; else - err = ops->action(rule, fl, flags, arg); - - if (!err && ops->suppress && ops->suppress(rule, arg)) + err = INDIRECT_CALL_INET(ops->action, + fib6_rule_action, + fib4_rule_action, + rule, fl, flags, arg); + + if (!err && ops->suppress && INDIRECT_CALL_INET(ops->suppress, + fib6_rule_suppress, + fib4_rule_suppress, + rule, arg)) continue; if (err != -EAGAIN) { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f99e3bac5cab..ce54a30c2ef1 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -29,6 +29,7 @@ #include #include #include +#include struct fib4_rule { struct fib_rule common; @@ -103,8 +104,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, } EXPORT_SYMBOL_GPL(__fib_lookup); -static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) +INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule, + struct flowi *flp, int flags, + struct fib_lookup_arg *arg) { int err = -EAGAIN; struct fib_table *tbl; @@ -138,7 +140,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, return err; } -static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, + struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; struct net_device *dev = NULL; @@ -169,7 +172,8 @@ suppress_route: return true; } -static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, + struct flowi *fl, int flags) { struct fib4_rule *r = (struct fib4_rule *) rule; struct flowi4 *fl4 = &fl->u.ip4; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 6053ef851555..8f9a83314de7 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -255,8 +256,9 @@ out: return err; } -static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) +INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule, + struct flowi *flp, int flags, + struct fib_lookup_arg *arg) { if (arg->lookup_ptr == fib6_table_lookup) return fib6_rule_action_alt(rule, flp, flags, arg); @@ -264,7 +266,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, return __fib6_rule_action(rule, flp, flags, arg); } -static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) +INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, + struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; struct rt6_info *rt = res->rt6; @@ -296,7 +299,8 @@ suppress_route: return true; } -static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, + struct flowi *fl, int flags) { struct fib6_rule *r = (struct fib6_rule *) rule; struct flowi6 *fl6 = &fl->u.ip6; -- cgit From 50935339c394adfb3d7253055e3bc10ee70264b0 Mon Sep 17 00:00:00 2001 From: "Alexander A. Klimov" Date: Sat, 25 Jul 2020 19:02:25 +0200 Subject: netfilter: Replace HTTP links with HTTPS ones Rationale: Reduces attack surface on kernel devs opening the links for MITM as HTTPS traffic is much harder to manipulate. Deterministic algorithm: For each file: If not .svg: For each line: If doesn't contain `\bxmlns\b`: For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`: If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`: If both the HTTP and HTTPS versions return 200 OK and serve the same content: Replace HTTP with HTTPS. Signed-off-by: Alexander A. Klimov Signed-off-by: Pablo Neira Ayuso --- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/netfilter/Kconfig | 2 +- net/netfilter/nfnetlink_acct.c | 2 +- net/netfilter/nft_set_pipapo.c | 4 ++-- net/netfilter/xt_CONNSECMARK.c | 2 +- net/netfilter/xt_connmark.c | 2 +- net/netfilter/xt_nfacct.c | 2 +- net/netfilter/xt_time.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index dc705769acc9..26a9193df783 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -6,7 +6,7 @@ * * DECnet Routing Message Grabulator * - * (C) 2000 ChyGwyn Limited - http://www.chygwyn.com/ + * (C) 2000 ChyGwyn Limited - https://www.chygwyn.com/ * * Author: Steven Whitehouse */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0ffe2b8723c4..25313c29d799 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -447,7 +447,7 @@ config NF_TABLES replace the existing {ip,ip6,arp,eb}_tables infrastructure. It provides a pseudo-state machine with an extensible instruction-set (also known as expressions) that the userspace 'nft' utility - (http://www.netfilter.org/projects/nftables) uses to build the + (https://www.netfilter.org/projects/nftables) uses to build the rule-set. It also comes with the generic set infrastructure that allows you to construct mappings between matchings and actions for performance lookups. diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 5827117f2635..5bfec829c12f 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2011 Pablo Neira Ayuso - * (C) 2011 Intra2net AG + * (C) 2011 Intra2net AG */ #include #include diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index cc6082a5f7ad..9944523f5c2c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -312,7 +312,7 @@ * Jay Ligatti, Josh Kuhn, and Chris Gage. * Proceedings of the IEEE International Conference on Computer * Communication Networks (ICCCN), August 2010. - * http://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf + * https://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf * * [Rottenstreich 2010] * Worst-Case TCAM Rule Expansion @@ -325,7 +325,7 @@ * Kirill Kogan, Sergey Nikolenko, Ori Rottenstreich, William Culhane, * and Patrick Eugster. * Proceedings of the 2014 ACM conference on SIGCOMM, August 2014. - * http://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf + * https://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf */ #include diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index a5c8b653476a..76acecf3e757 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -6,7 +6,7 @@ * with the SECMARK target and state match. * * Based somewhat on CONNMARK: - * Copyright (C) 2002,2004 MARA Systems AB + * Copyright (C) 2002,2004 MARA Systems AB * by Henrik Nordstrom * * (C) 2006,2008 Red Hat, Inc., James Morris diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index eec2f3a88d73..e5ebc0810675 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -2,7 +2,7 @@ /* * xt_connmark - Netfilter module to operate on connection marks * - * Copyright (C) 2002,2004 MARA Systems AB + * Copyright (C) 2002,2004 MARA Systems AB * by Henrik Nordstrom * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * Jan Engelhardt diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 5aab6df74e0f..a97c2259bbc8 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2011 Pablo Neira Ayuso - * (C) 2011 Intra2net AG + * (C) 2011 Intra2net AG */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 67cb98489415..6aa12d0f54e2 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -5,7 +5,7 @@ * based on ipt_time by Fabrice MARIE * This is a module which is used for time matching * It is using some modified code from dietlibc (localtime() function) - * that you can find at http://www.fefe.de/dietlibc/ + * that you can find at https://www.fefe.de/dietlibc/ * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. */ -- cgit From 42f36eba71c4f0c532d6de761c154d00f9f1900d Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Sun, 26 Jul 2020 21:52:05 -0400 Subject: netfilter: ip6tables: Remove redundant null checks Remove superfluous check for NULL pointer to header. Signed-off-by: Gaurav Singh Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_ah.c | 3 +-- net/ipv6/netfilter/ip6t_frag.c | 3 +-- net/ipv6/netfilter/ip6t_hbh.c | 3 +-- net/ipv6/netfilter/ip6t_rt.c | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 4e15a14435e4..70da2f2ce064 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -74,8 +74,7 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par) ahinfo->hdrres, ah->reserved, !(ahinfo->hdrres && ah->reserved)); - return (ah != NULL) && - spi_match(ahinfo->spis[0], ahinfo->spis[1], + return spi_match(ahinfo->spis[0], ahinfo->spis[1], ntohl(ah->spi), !!(ahinfo->invflags & IP6T_AH_INV_SPI)) && (!ahinfo->hdrlen || diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index fb91eeee4a1e..3aad6439386b 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -85,8 +85,7 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par) !((fraginfo->flags & IP6T_FRAG_NMF) && (ntohs(fh->frag_off) & IP6_MF))); - return (fh != NULL) && - id_match(fraginfo->ids[0], fraginfo->ids[1], + return id_match(fraginfo->ids[0], fraginfo->ids[1], ntohl(fh->identification), !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) && !((fraginfo->flags & IP6T_FRAG_RES) && diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 467b2a86031b..e7a3fb9355ee 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -86,8 +86,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par) ((optinfo->hdrlen == hdrlen) ^ !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); - ret = (oh != NULL) && - (!(optinfo->flags & IP6T_OPTS_LEN) || + ret = (!(optinfo->flags & IP6T_OPTS_LEN) || ((optinfo->hdrlen == hdrlen) ^ !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index f633dc84ca3f..733c83d38b30 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -89,8 +89,7 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) !((rtinfo->flags & IP6T_RT_RES) && (((const struct rt0_hdr *)rh)->reserved))); - ret = (rh != NULL) && - (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], + ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], rh->segments_left, !!(rtinfo->invflags & IP6T_RT_INV_SGS))) && (!(rtinfo->flags & IP6T_RT_LEN) || -- cgit From 41d707b7332f1386642c47eb078110ca368a46f5 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Wed, 29 Jul 2020 11:10:18 -0700 Subject: fib: fix fib_rules_ops indirect calls wrappers This patch fixes: commit b9aaec8f0be5 ("fib: use indirect call wrappers in the most common fib_rules_ops") which didn't consider the case when CONFIG_IPV6_MULTIPLE_TABLES is not set. Reported-by: Stephen Rothwell Fixes: b9aaec8f0be5 ("fib: use indirect call wrappers in the most common fib_rules_ops") Signed-off-by: Brian Vazquez Signed-off-by: David S. Miller --- net/core/fib_rules.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7a8f87b0bb2..fce645f6b9b1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,6 +16,13 @@ #include #include +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define INDIRECT_CALL_MT(f, f2, f1, ...) \ + INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#endif + static const struct fib_kuid_range fib_kuid_range_unset = { KUIDT_INIT(0), KUIDT_INIT(~0), @@ -268,10 +275,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, uid_gt(fl->flowi_uid, rule->uid_range.end)) goto out; - ret = INDIRECT_CALL_INET(ops->match, - fib6_rule_match, - fib4_rule_match, - rule, fl, flags); + ret = INDIRECT_CALL_MT(ops->match, + fib6_rule_match, + fib4_rule_match, + rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; } @@ -302,15 +309,15 @@ jumped: } else if (rule->action == FR_ACT_NOP) continue; else - err = INDIRECT_CALL_INET(ops->action, - fib6_rule_action, - fib4_rule_action, - rule, fl, flags, arg); - - if (!err && ops->suppress && INDIRECT_CALL_INET(ops->suppress, - fib6_rule_suppress, - fib4_rule_suppress, - rule, arg)) + err = INDIRECT_CALL_MT(ops->action, + fib6_rule_action, + fib4_rule_action, + rule, fl, flags, arg); + + if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, + fib6_rule_suppress, + fib4_rule_suppress, + rule, arg)) continue; if (err != -EAGAIN) { -- cgit From 6540351e6f27ef718e3cf5b46349633f3ec57859 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 23 Jul 2020 18:08:56 +0530 Subject: Bluetooth: Translate additional address type correctly When using controller based address resolution, then the new address types 0x02 and 0x03 are used. These types need to be converted back into either public address or random address types. Signed-off-by: Marcel Holtmann Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4ba23b821cbf..3f89bd639860 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3292,6 +3292,15 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, { struct hci_conn_params *param; + switch (addr_type) { + case ADDR_LE_DEV_PUBLIC_RESOLVED: + addr_type = ADDR_LE_DEV_PUBLIC; + break; + case ADDR_LE_DEV_RANDOM_RESOLVED: + addr_type = ADDR_LE_DEV_RANDOM; + break; + } + list_for_each_entry(param, list, action) { if (bacmp(¶m->addr, addr) == 0 && param->addr_type == addr_type) -- cgit From e1d572357599d142df5764b39731b6eb55a22beb Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 23 Jul 2020 18:08:57 +0530 Subject: Bluetooth: Configure controller address resolution if available When the LL Privacy support is available, then as part of enabling or disabling passive background scanning, it is required to set up the controller based address resolution as well. Since only passive background scanning is utilizing the whitelist, the address resolution is now bound to the whitelist and passive background scanning. All other resolution can be easily done by the host stack. Signed-off-by: Marcel Holtmann Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7c0c2fda04ad..7d0ba53ffed0 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -675,6 +675,12 @@ void hci_req_add_le_scan_disable(struct hci_request *req) cp.enable = LE_SCAN_DISABLE; hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } + + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { + __u8 enable = 0x00; + hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); + } } static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, @@ -816,7 +822,8 @@ static bool scan_use_rpa(struct hci_dev *hdev) } static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, - u16 window, u8 own_addr_type, u8 filter_policy) + u16 window, u8 own_addr_type, u8 filter_policy, + bool addr_resolv) { struct hci_dev *hdev = req->hdev; @@ -825,6 +832,11 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, return; } + if (use_ll_privacy(hdev) && addr_resolv) { + u8 enable = 0x01; + hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); + } + /* Use ext scanning if set ext scan param and ext scan enable is * supported */ @@ -898,12 +910,18 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, } } +/* Ensure to call hci_req_add_le_scan_disable() first to disable the + * controller based address resolution to be able to reconfigure + * resolving list. + */ void hci_req_add_le_passive_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 own_addr_type; u8 filter_policy; u16 window, interval; + /* Background scanning should run with address resolution */ + bool addr_resolv = true; if (hdev->scanning_paused) { bt_dev_dbg(hdev, "Scanning is paused for suspend"); @@ -949,7 +967,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req) bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy); hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window, - own_addr_type, filter_policy); + own_addr_type, filter_policy, addr_resolv); } static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance) @@ -2789,6 +2807,8 @@ static int active_scan(struct hci_request *req, unsigned long opt) u8 own_addr_type; /* White list is not used for discovery */ u8 filter_policy = 0x00; + /* Discovery doesn't require controller address resolution */ + bool addr_resolv = false; int err; BT_DBG("%s", hdev->name); @@ -2811,7 +2831,7 @@ static int active_scan(struct hci_request *req, unsigned long opt) hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, hdev->le_scan_window_discovery, own_addr_type, - filter_policy); + filter_policy, addr_resolv); return 0; } -- cgit From 0eee35bdfa3b472cc986ecc6ad76293fdcda59e2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 23 Jul 2020 18:08:58 +0530 Subject: Bluetooth: Update resolving list when updating whitelist When the whitelist is updated, then also update the entries of the resolving list for devices where IRKs are available. Signed-off-by: Marcel Holtmann Signed-off-by: Sathish Narsimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7d0ba53ffed0..85de1f356610 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -694,6 +694,21 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr, bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp); + + if (use_ll_privacy(req->hdev)) { + struct smp_irk *irk; + + irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type); + if (irk) { + struct hci_cp_le_del_from_resolv_list cp; + + cp.bdaddr_type = bdaddr_type; + bacpy(&cp.bdaddr, bdaddr); + + hci_req_add(req, HCI_OP_LE_DEL_FROM_RESOLV_LIST, + sizeof(cp), &cp); + } + } } /* Adds connection to white list if needed. On error, returns -1. */ @@ -714,7 +729,7 @@ static int add_to_white_list(struct hci_request *req, return -1; /* White list can not be used with RPAs */ - if (!allow_rpa && + if (!allow_rpa && !use_ll_privacy(hdev) && hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { return -1; } @@ -732,6 +747,28 @@ static int add_to_white_list(struct hci_request *req, cp.bdaddr_type); hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp); + if (use_ll_privacy(hdev)) { + struct smp_irk *irk; + + irk = hci_find_irk_by_addr(hdev, ¶ms->addr, + params->addr_type); + if (irk) { + struct hci_cp_le_add_to_resolv_list cp; + + cp.bdaddr_type = params->addr_type; + bacpy(&cp.bdaddr, ¶ms->addr); + memcpy(cp.peer_irk, irk->val, 16); + + if (hci_dev_test_flag(hdev, HCI_PRIVACY)) + memcpy(cp.local_irk, hdev->irk, 16); + else + memset(cp.local_irk, 0, 16); + + hci_req_add(req, HCI_OP_LE_ADD_TO_RESOLV_LIST, + sizeof(cp), &cp); + } + } + return 0; } @@ -772,7 +809,7 @@ static u8 update_white_list(struct hci_request *req) } /* White list can not be used with RPAs */ - if (!allow_rpa && + if (!allow_rpa && !use_ll_privacy(hdev) && hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { return 0x00; } -- cgit From b31bc00bfe3a4881e48e196b93cec1efb491ef2b Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:08:59 +0530 Subject: Bluetooth: Translate additional address type during le_conn When using controller based address resolution, then the new address types 0x02 and 0x03 are used. These types need to be converted back into either public address or random address types. This patch is specially during LE_CREATE_CONN if using own_add_type as 0x02 or 0x03. Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 61f8c4d12028..6388fb55b4d2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2296,6 +2296,22 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr, if (!conn) return; + /* When using controller based address resolution, then the new + * address types 0x02 and 0x03 are used. These types need to be + * converted back into either public address or random address type + */ + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { + switch (own_address_type) { + case ADDR_LE_DEV_PUBLIC_RESOLVED: + own_address_type = ADDR_LE_DEV_PUBLIC; + break; + case ADDR_LE_DEV_RANDOM_RESOLVED: + own_address_type = ADDR_LE_DEV_RANDOM; + break; + } + } + /* Store the initiator and responder address information which * is needed for SMP. These values will not change during the * lifetime of the connection. -- cgit From d03c759e391901ed8584117abd52ca4381a652c9 Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:09:00 +0530 Subject: Bluetooth: Let controller creates RPA during le create conn When address resolution is enabled and set_privacy is enabled let's use own address type as 0x03 Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 85de1f356610..e48f0945a417 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2242,7 +2242,13 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, if (use_rpa) { int to; - *own_addr_type = ADDR_LE_DEV_RANDOM; + /* If Controller supports LL Privacy use own address type is + * 0x03 + */ + if (use_ll_privacy(hdev)) + *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; + else + *own_addr_type = ADDR_LE_DEV_RANDOM; if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && !bacmp(&hdev->random_addr, &hdev->rpa)) -- cgit From 5c49bcce5c124406920843af65574104aaaa3309 Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:09:01 +0530 Subject: Bluetooth: Enable/Disable address resolution during le create conn In this patch if le_create_conn process is started restrict to disable address resolution and same is disabled during le_enh_connection_complete Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 7 ++++++- net/bluetooth/hci_event.c | 4 ++++ net/bluetooth/hci_request.c | 45 ++++++++++++++++++++++++++++++++++----------- net/bluetooth/hci_request.h | 3 ++- net/bluetooth/mgmt.c | 2 +- 5 files changed, 47 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index be67361ff2f0..9832f8445d43 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1003,6 +1003,11 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, struct hci_request req; int err; + /* This ensures that during disable le_scan address resolution + * will not be disabled if it is followed by le_create_conn + */ + bool rpa_le_conn = true; + /* Let's make sure that le is enabled.*/ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) { if (lmp_le_capable(hdev)) @@ -1103,7 +1108,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, * state. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(&req); + hci_req_add_le_scan_disable(&req, rpa_le_conn); hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6388fb55b4d2..628831b15c0a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5228,6 +5228,10 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); + + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + hci_req_disable_address_resolution(hdev); } static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e48f0945a417..70e077cc7dfa 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -428,7 +428,7 @@ static void __hci_update_background_scan(struct hci_request *req) if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) return; - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); BT_DBG("%s stopping background scanning", hdev->name); } else { @@ -447,7 +447,7 @@ static void __hci_update_background_scan(struct hci_request *req) * don't miss any advertising (due to duplicates filter). */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); hci_req_add_le_passive_scan(req); @@ -652,7 +652,7 @@ void __hci_req_update_eir(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); } -void hci_req_add_le_scan_disable(struct hci_request *req) +void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) { struct hci_dev *hdev = req->hdev; @@ -676,8 +676,9 @@ void hci_req_add_le_scan_disable(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); } + /* Disable address resolution */ if (use_ll_privacy(hdev) && - hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) { + hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) { __u8 enable = 0x00; hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); } @@ -1072,7 +1073,7 @@ static void hci_req_config_le_suspend_scan(struct hci_request *req) { /* Before changing params disable scan if enabled */ if (hci_dev_test_flag(req->hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); /* Configure params and enable scanning */ hci_req_add_le_passive_scan(req); @@ -1140,7 +1141,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Disable LE passive scan if enabled */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(&req); + hci_req_add_le_scan_disable(&req, false); /* Mark task needing completion */ set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); @@ -1696,6 +1697,28 @@ int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance) return hci_req_run(&req, NULL); } +static void enable_addr_resolution_complete(struct hci_dev *hdev, u8 status, + u16 opcode) +{ + BT_DBG("%s status %u", hdev->name, status); +} + +void hci_req_disable_address_resolution(struct hci_dev *hdev) +{ + struct hci_request req; + __u8 enable = 0x00; + + if (!use_ll_privacy(hdev) && + !hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) + return; + + hci_req_init(&req, hdev); + + hci_req_add(&req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); + + hci_req_run(&req, enable_addr_resolution_complete); +} + static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) { BT_DBG("%s status %u", hdev->name, status); @@ -2667,7 +2690,7 @@ static void bg_scan_update(struct work_struct *work) static int le_scan_disable(struct hci_request *req, unsigned long opt) { - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); return 0; } @@ -2770,7 +2793,7 @@ static int le_scan_restart(struct hci_request *req, unsigned long opt) return 0; } - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); if (use_ext_scan(hdev)) { struct hci_cp_le_set_ext_scan_enable ext_enable_cp; @@ -2861,7 +2884,7 @@ static int active_scan(struct hci_request *req, unsigned long opt) * discovery scanning parameters. */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); /* All active scans will be done with either a resolvable private * address (when privacy feature has been enabled) or non-resolvable @@ -2976,14 +2999,14 @@ bool hci_req_stop_discovery(struct hci_request *req) if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { cancel_delayed_work(&hdev->le_scan_disable); - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); } ret = true; } else { /* Passive scanning */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(req); + hci_req_add_le_scan_disable(req, false); ret = true; } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index bbe892ab078a..6a12e84c66c4 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -65,11 +65,12 @@ void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); -void hci_req_add_le_scan_disable(struct hci_request *req); +void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn); void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); +void hci_req_disable_address_resolution(struct hci_dev *hdev); void hci_req_reenable_advertising(struct hci_dev *hdev); void __hci_req_enable_advertising(struct hci_request *req); void __hci_req_disable_advertising(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f45105d2de77..47bcfe2fb14c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5226,7 +5226,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - hci_req_add_le_scan_disable(&req); + hci_req_add_le_scan_disable(&req, false); hci_req_add_le_passive_scan(&req); hci_req_run(&req, NULL); -- cgit From b2cc23398e8166b38f8715026273503b081c2a7a Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:09:02 +0530 Subject: Bluetooth: Enable RPA Timeout Enable RPA timeout during bluetooth initialization. The RPA timeout value is used from hdev, which initialized from debug_fs Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3f89bd639860..68bfe57b6625 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -763,6 +763,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt) hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL); } + if (hdev->commands[35] & 0x40) { + __le16 rpa_timeout = cpu_to_le16(hdev->rpa_timeout); + + /* Set RPA timeout */ + hci_req_add(req, HCI_OP_LE_SET_RPA_TIMEOUT, 2, + &rpa_timeout); + } + if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { /* Read LE Maximum Data Length */ hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL); -- cgit From cbbdfa6f331980c6786b4ca5df53c37b90df3246 Mon Sep 17 00:00:00 2001 From: Sathish Narasimman Date: Thu, 23 Jul 2020 18:09:03 +0530 Subject: Bluetooth: Enable controller RPA resolution using Experimental feature This patch adds support to enable the use of RPA Address resolution using expermental feature mgmt command. Signed-off-by: Sathish Narasimman Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 1 + net/bluetooth/hci_request.c | 7 ++- net/bluetooth/mgmt.c | 142 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 147 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 628831b15c0a..33d8458fdd4a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5230,6 +5230,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, le16_to_cpu(ev->supervision_timeout)); if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) hci_req_disable_address_resolution(hdev); } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 70e077cc7dfa..435400a43a78 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -678,8 +678,10 @@ void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) /* Disable address resolution */ if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) { __u8 enable = 0x00; + hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); } } @@ -870,8 +872,11 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, return; } - if (use_ll_privacy(hdev) && addr_resolv) { + if (use_ll_privacy(hdev) && + hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && + addr_resolv) { u8 enable = 0x01; + hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 47bcfe2fb14c..4ec0fee80344 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -795,10 +795,15 @@ static u32 get_supported_settings(struct hci_dev *hdev) if (lmp_le_capable(hdev)) { settings |= MGMT_SETTING_LE; - settings |= MGMT_SETTING_ADVERTISING; settings |= MGMT_SETTING_SECURE_CONN; settings |= MGMT_SETTING_PRIVACY; settings |= MGMT_SETTING_STATIC_ADDRESS; + + /* When the experimental feature for LL Privacy support is + * enabled, then advertising is no longer supported. + */ + if (!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + settings |= MGMT_SETTING_ADVERTISING; } if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) || @@ -3759,10 +3764,16 @@ static const u8 simult_central_periph_uuid[16] = { 0x96, 0x46, 0xc0, 0x42, 0xb5, 0x10, 0x1b, 0x67, }; +/* 15c0a148-c273-11ea-b3de-0242ac130004 */ +static const u8 rpa_resolution_uuid[16] = { + 0x04, 0x00, 0x13, 0xac, 0x42, 0x02, 0xde, 0xb3, + 0xea, 0x11, 0x73, 0xc2, 0x48, 0xa1, 0xc0, 0x15, +}; + static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[44]; + char buf[62]; /* Enough space for 3 features */ struct mgmt_rp_read_exp_features_info *rp = (void *)buf; u16 idx = 0; u32 flags; @@ -3795,6 +3806,17 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, idx++; } + if (hdev && use_ll_privacy(hdev)) { + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + flags = BIT(0) | BIT(1); + else + flags = BIT(1); + + memcpy(rp->features[idx].uuid, rpa_resolution_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable @@ -3807,6 +3829,21 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, 0, rp, sizeof(*rp) + (20 * idx)); } +static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev, + struct sock *skip) +{ + struct mgmt_ev_exp_feature_changed ev; + + memset(&ev, 0, sizeof(ev)); + memcpy(ev.uuid, rpa_resolution_uuid, 16); + ev.flags = cpu_to_le32((enabled ? BIT(0) : 0) | BIT(1)); + + return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, hdev, + &ev, sizeof(ev), + HCI_MGMT_EXP_FEATURE_EVENTS, skip); + +} + #ifdef CONFIG_BT_FEATURE_DEBUG static int exp_debug_feature_changed(bool enabled, struct sock *skip) { @@ -3845,6 +3882,16 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, } #endif + if (hdev && use_ll_privacy(hdev) && !hdev_is_powered(hdev)) { + bool changed = hci_dev_test_flag(hdev, + HCI_ENABLE_LL_PRIVACY); + + hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); + + if (changed) + exp_ll_privacy_feature_changed(false, hdev, sk); + } + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, @@ -3895,6 +3942,69 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev, } #endif + if (!memcmp(cp->uuid, rpa_resolution_uuid, 16)) { + bool val, changed; + int err; + u32 flags; + + /* Command requires to use the controller index */ + if (!hdev) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Changes can only be made when controller is powered down */ + if (hdev_is_powered(hdev)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_NOT_POWERED); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + val = !!cp->param[0]; + + if (val) { + changed = !hci_dev_test_flag(hdev, + HCI_ENABLE_LL_PRIVACY); + hci_dev_set_flag(hdev, HCI_ENABLE_LL_PRIVACY); + hci_dev_clear_flag(hdev, HCI_ADVERTISING); + + /* Enable LL privacy + supported settings changed */ + flags = BIT(0) | BIT(1); + } else { + changed = hci_dev_test_flag(hdev, + HCI_ENABLE_LL_PRIVACY); + hci_dev_clear_flag(hdev, HCI_ENABLE_LL_PRIVACY); + + /* Disable LL privacy + supported settings changed */ + flags = BIT(1); + } + + memcpy(rp.uuid, rpa_resolution_uuid, 16); + rp.flags = cpu_to_le32(flags); + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_ll_privacy_feature_changed(val, hdev, sk); + + return err; + } + return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE, MGMT_OP_SET_EXP_FEATURE, MGMT_STATUS_NOT_SUPPORTED); @@ -5040,6 +5150,13 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, status); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -7112,6 +7229,13 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES, MGMT_STATUS_REJECTED); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + hci_dev_lock(hdev); rp_len = sizeof(*rp) + hdev->adv_instance_cnt; @@ -7315,6 +7439,13 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, status); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -7479,6 +7610,13 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, bt_dev_dbg(hdev, "sock %p", sk); + /* Enabling the experimental LL Privay support disables support for + * advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_NOT_SUPPORTED); + hci_dev_lock(hdev); if (cp->instance && !hci_find_adv_instance(hdev, cp->instance)) { -- cgit From f7c6cb1d9728dea9d9f131ef57303d6821afb0f8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jul 2020 17:31:03 -0700 Subject: bpf: Expose socket storage to BPF_PROG_TYPE_CGROUP_SOCK This lets us use socket storage from the following hooks: * BPF_CGROUP_INET_SOCK_CREATE * BPF_CGROUP_INET_SOCK_RELEASE * BPF_CGROUP_INET4_POST_BIND * BPF_CGROUP_INET6_POST_BIND Using existing 'bpf_sk_storage_get_proto' doesn't work because second argument is ARG_PTR_TO_SOCKET. Even though BPF_PROG_TYPE_CGROUP_SOCK hooks operate on 'struct bpf_sock', the verifier still considers it as a PTR_TO_CTX. That's why I'm adding another 'bpf_sk_storage_get_cg_sock_proto' definition strictly for BPF_PROG_TYPE_CGROUP_SOCK which accepts ARG_PTR_TO_CTX which is really 'struct sock' for this program type. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200729003104.1280813-1-sdf@google.com --- net/core/bpf_sk_storage.c | 10 ++++++++++ net/core/filter.c | 3 +++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index eafcd15e7dfd..d3377c90a291 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -944,6 +944,16 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; +const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { + .func = bpf_sk_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_sk_storage_delete_proto = { .func = bpf_sk_storage_delete, .gpl_only = false, diff --git a/net/core/filter.c b/net/core/filter.c index 29e3455122f7..7124f0fe6974 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6187,6 +6187,7 @@ bool bpf_helper_changes_pkt_data(void *func) } const struct bpf_func_proto bpf_event_output_data_proto __weak; +const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak; static const struct bpf_func_proto * sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) @@ -6219,6 +6220,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; #endif + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_cg_sock_proto; default: return bpf_base_func_proto(func_id); } -- cgit From b2aecfe8e49059a69379a521aa68b4f55516780f Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 28 Jul 2020 18:20:28 +0100 Subject: l2tp: don't export __l2tp_session_unhash When __l2tp_session_unhash was first added it was used outside of l2tp_core.c, but that's no longer the case. As such, there's no longer a need to export the function. Make it private inside l2tp_core.c, and relocate it to avoid having to declare the function prototype in l2tp_core.h. Since the function is no longer used outside l2tp_core.c, remove the "__" prefix since we don't need to indicate anything special about its expected use to callers. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 57 ++++++++++++++++++++++++---------------------------- net/l2tp/l2tp_core.h | 1 - 2 files changed, 26 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e723828e458b..7f4aef5a58ba 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1180,6 +1180,30 @@ end: return; } +/* Remove an l2tp session from l2tp_core's hash lists. */ +static void l2tp_session_unhash(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; + + /* Remove the session from core hashes */ + if (tunnel) { + /* Remove from the per-tunnel hash */ + write_lock_bh(&tunnel->hlist_lock); + hlist_del_init(&session->hlist); + write_unlock_bh(&tunnel->hlist_lock); + + /* For L2TPv3 we have a per-net hash: remove from there, too */ + if (tunnel->version != L2TP_HDR_VER_2) { + struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_del_init_rcu(&session->global_hlist); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + synchronize_rcu(); + } + } +} + /* When the tunnel is closed, all the attached sessions need to go too. */ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) @@ -1209,7 +1233,7 @@ again: write_unlock_bh(&tunnel->hlist_lock); - __l2tp_session_unhash(session); + l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close) @@ -1574,35 +1598,6 @@ out: } EXPORT_SYMBOL_GPL(l2tp_session_free); -/* Remove an l2tp session from l2tp_core's hash lists. - * Provides a tidyup interface for pseudowire code which can't just route all - * shutdown via. l2tp_session_delete and a pseudowire-specific session_close - * callback. - */ -void __l2tp_session_unhash(struct l2tp_session *session) -{ - struct l2tp_tunnel *tunnel = session->tunnel; - - /* Remove the session from core hashes */ - if (tunnel) { - /* Remove from the per-tunnel hash */ - write_lock_bh(&tunnel->hlist_lock); - hlist_del_init(&session->hlist); - write_unlock_bh(&tunnel->hlist_lock); - - /* For L2TPv3 we have a per-net hash: remove from there, too */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_del_init_rcu(&session->global_hlist); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); - } - } -} -EXPORT_SYMBOL_GPL(__l2tp_session_unhash); - /* This function is used by the netlink SESSION_DELETE command and by * pseudowire modules. */ @@ -1611,7 +1606,7 @@ int l2tp_session_delete(struct l2tp_session *session) if (test_and_set_bit(0, &session->dead)) return 0; - __l2tp_session_unhash(session); + l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close) (*session->session_close)(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 2d2dd219a176..f6dd74476d13 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -201,7 +201,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel); -void __l2tp_session_unhash(struct l2tp_session *session); int l2tp_session_delete(struct l2tp_session *session); void l2tp_session_free(struct l2tp_session *session); void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, -- cgit From 52016e259bab98613453e29d42f2ffe456feee11 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 28 Jul 2020 18:20:29 +0100 Subject: l2tp: don't export tunnel and session free functions Tunnel and session instances are reference counted, and shouldn't be directly freed by pseudowire code. Rather than exporting l2tp_tunnel_free and l2tp_session_free, make them private to l2tp_core.c, and export the refcount functions instead. In order to do this, the refcount functions cannot be declared as inline. Since the codepaths which take and drop tunnel and session references are not directly in the datapath this shouldn't cause performance issues. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 60 +++++++++++++++++++++++++++++++++++----------------- net/l2tp/l2tp_core.h | 33 +++++------------------------ 2 files changed, 46 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7f4aef5a58ba..f22fe34eb8fc 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -149,12 +149,51 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } -void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) +static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { sock_put(tunnel->sock); /* the tunnel is freed in the socket destructor */ } -EXPORT_SYMBOL(l2tp_tunnel_free); + +static void l2tp_session_free(struct l2tp_session *session) +{ + struct l2tp_tunnel *tunnel = session->tunnel; + + if (tunnel) { + if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC)) + goto out; + l2tp_tunnel_dec_refcount(tunnel); + } + +out: + kfree(session); +} + +void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel) +{ + refcount_inc(&tunnel->ref_count); +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_inc_refcount); + +void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) +{ + if (refcount_dec_and_test(&tunnel->ref_count)) + l2tp_tunnel_free(tunnel); +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_dec_refcount); + +void l2tp_session_inc_refcount(struct l2tp_session *session) +{ + refcount_inc(&session->ref_count); +} +EXPORT_SYMBOL_GPL(l2tp_session_inc_refcount); + +void l2tp_session_dec_refcount(struct l2tp_session *session) +{ + if (refcount_dec_and_test(&session->ref_count)) + l2tp_session_free(session); +} +EXPORT_SYMBOL_GPL(l2tp_session_dec_refcount); /* Lookup a tunnel. A new reference is held on the returned tunnel. */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) @@ -1581,23 +1620,6 @@ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -/* Really kill the session. - */ -void l2tp_session_free(struct l2tp_session *session) -{ - struct l2tp_tunnel *tunnel = session->tunnel; - - if (tunnel) { - if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC)) - goto out; - l2tp_tunnel_dec_refcount(tunnel); - } - -out: - kfree(session); -} -EXPORT_SYMBOL_GPL(l2tp_session_free); - /* This function is used by the netlink SESSION_DELETE command and by * pseudowire modules. */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index f6dd74476d13..5c49e2762300 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -175,13 +175,16 @@ static inline void *l2tp_session_priv(struct l2tp_session *session) return &session->priv[0]; } +void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel); +void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel); +void l2tp_session_inc_refcount(struct l2tp_session *session); +void l2tp_session_dec_refcount(struct l2tp_session *session); + struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, u32 session_id); -void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); - struct l2tp_session *l2tp_session_get(const struct net *net, u32 session_id); struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, @@ -202,7 +205,6 @@ int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel); int l2tp_session_delete(struct l2tp_session *session); -void l2tp_session_free(struct l2tp_session *session); void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length); @@ -217,31 +219,6 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); -static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel) -{ - refcount_inc(&tunnel->ref_count); -} - -static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) -{ - if (refcount_dec_and_test(&tunnel->ref_count)) - l2tp_tunnel_free(tunnel); -} - -/* Session reference counts. Incremented when code obtains a reference - * to a session. - */ -static inline void l2tp_session_inc_refcount(struct l2tp_session *session) -{ - refcount_inc(&session->ref_count); -} - -static inline void l2tp_session_dec_refcount(struct l2tp_session *session) -{ - if (refcount_dec_and_test(&session->ref_count)) - l2tp_session_free(session); -} - static inline int l2tp_get_l2specific_len(struct l2tp_session *session) { switch (session->l2specific_type) { -- cgit From 628703f59dcc832a0bd04b4fa41d856e5df98112 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 28 Jul 2020 18:20:30 +0100 Subject: l2tp: return void from l2tp_session_delete l2tp_session_delete is used to schedule a session instance for deletion. The function itself always returns zero, and none of its direct callers check its return value, so have the function return void. This change de-facto changes the l2tp netlink session_delete callback prototype since all pseudowires currently use l2tp_session_delete for their implementation of that operation. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 9 ++------- net/l2tp/l2tp_core.h | 4 ++-- net/l2tp/l2tp_netlink.c | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f22fe34eb8fc..690dcbc30472 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1620,13 +1620,10 @@ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); -/* This function is used by the netlink SESSION_DELETE command and by - * pseudowire modules. - */ -int l2tp_session_delete(struct l2tp_session *session) +void l2tp_session_delete(struct l2tp_session *session) { if (test_and_set_bit(0, &session->dead)) - return 0; + return; l2tp_session_unhash(session); l2tp_session_queue_purge(session); @@ -1634,8 +1631,6 @@ int l2tp_session_delete(struct l2tp_session *session) (*session->session_close)(session); l2tp_session_dec_refcount(session); - - return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 5c49e2762300..0c32981f0cd3 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -167,7 +167,7 @@ struct l2tp_nl_cmd_ops { int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); - int (*session_delete)(struct l2tp_session *session); + void (*session_delete)(struct l2tp_session *session); }; static inline void *l2tp_session_priv(struct l2tp_session *session) @@ -204,7 +204,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel); -int l2tp_session_delete(struct l2tp_session *session); +void l2tp_session_delete(struct l2tp_session *session); void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 35716a6e1e2c..def78eebca4c 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -670,7 +670,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf pw_type = session->pwtype; if (pw_type < __L2TP_PWTYPE_MAX) if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) - ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); + l2tp_nl_cmd_ops[pw_type]->session_delete(session); l2tp_session_dec_refcount(session); -- cgit From 2dedab6ff57edd29848391fa917a8f96c4f82583 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 28 Jul 2020 18:20:31 +0100 Subject: l2tp: remove build_header callback in struct l2tp_session The structure of an L2TP data packet header varies depending on the version of the L2TP protocol being used. struct l2tp_session used to have a build_header callback to abstract this difference away. It's clearer to simply choose the correct function to use when building the data packet (and we save on the function pointer in the session structure). This approach does mean dereferencing the parent tunnel structure in order to determine the tunnel version, but we're doing that in the transmit path in any case. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 10 ++++------ net/l2tp/l2tp_core.h | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 690dcbc30472..3992af139479 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1116,7 +1116,10 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len } /* Setup L2TP header */ - session->build_header(session, __skb_push(skb, hdr_len)); + if (tunnel->version == L2TP_HDR_VER_2) + l2tp_build_l2tpv2_header(session, __skb_push(skb, hdr_len)); + else + l2tp_build_l2tpv3_header(session, __skb_push(skb, hdr_len)); /* Reset skb netfilter state */ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1700,11 +1703,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len); } - if (tunnel->version == L2TP_HDR_VER_2) - session->build_header = l2tp_build_l2tpv2_header; - else - session->build_header = l2tp_build_l2tpv3_header; - l2tp_session_set_header_len(session, tunnel->version); refcount_set(&session->ref_count, 1); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 0c32981f0cd3..3dfd3ddd28fd 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -101,7 +101,6 @@ struct l2tp_session { struct l2tp_stats stats; struct hlist_node global_hlist; /* global hash list node */ - int (*build_header)(struct l2tp_session *session, void *buf); void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); void (*session_close)(struct l2tp_session *session); void (*show)(struct seq_file *m, void *priv); -- cgit From ca7885dbcd899e63e47c33707c1615d9dd281c7f Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 28 Jul 2020 18:20:32 +0100 Subject: l2tp: tweak exports for l2tp_recv_common and l2tp_ioctl All of the l2tp subsystem's exported symbols are exported using EXPORT_SYMBOL_GPL, except for l2tp_recv_common and l2tp_ioctl. These functions alone are not useful without the rest of the l2tp infrastructure, so there's no practical benefit to these symbols using a different export policy. Change these exports to use EXPORT_SYMBOL_GPL for consistency with the rest of l2tp. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 3992af139479..701fc72ad9f4 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -808,7 +808,7 @@ discard: atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } -EXPORT_SYMBOL(l2tp_recv_common); +EXPORT_SYMBOL_GPL(l2tp_recv_common); /* Drop skbs from the session's reorder_q */ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index a159cb2bf0f4..df2a35b5714a 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -597,7 +597,7 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(amount, (int __user *)arg); } -EXPORT_SYMBOL(l2tp_ioctl); +EXPORT_SYMBOL_GPL(l2tp_ioctl); static struct proto l2tp_ip_prot = { .name = "L2TP/IP", -- cgit From 340bb1ac450ba21cc4dabd368791fa49b39c858a Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 28 Jul 2020 18:20:33 +0100 Subject: l2tp: improve API documentation in l2tp_core.h * Improve the description of the key l2tp subsystem data structures. * Add high-level description of the main APIs for interacting with l2tp core. * Add documentation for the l2tp netlink session command callbacks. * Document the session pseudowire callbacks. Signed-off-by: Tom Parkin Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.h | 86 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3dfd3ddd28fd..3468d6b177a0 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -15,15 +15,15 @@ #include #endif -/* Just some random numbers */ +/* Random numbers used for internal consistency checks of tunnel and session structures */ #define L2TP_TUNNEL_MAGIC 0x42114DDA #define L2TP_SESSION_MAGIC 0x0C04EB7D -/* Per tunnel, session hash table size */ +/* Per tunnel session hash table size */ #define L2TP_HASH_BITS 4 #define L2TP_HASH_SIZE BIT(L2TP_HASH_BITS) -/* System-wide, session hash table size */ +/* System-wide session hash table size */ #define L2TP_HASH_BITS_2 8 #define L2TP_HASH_SIZE_2 BIT(L2TP_HASH_BITS_2) @@ -43,9 +43,7 @@ struct l2tp_stats { struct l2tp_tunnel; -/* Describes a session. Contains information to determine incoming - * packets and transmit outgoing ones. - */ +/* L2TP session configuration */ struct l2tp_session_cfg { enum l2tp_pwtype pw_type; unsigned int recv_seq:1; /* expect receive packets with sequence numbers? */ @@ -63,6 +61,11 @@ struct l2tp_session_cfg { char *ifname; }; +/* Represents a session (pseudowire) instance. + * Tracks runtime state including cookies, dataplane packet sequencing, and IO statistics. + * Is linked into a per-tunnel session hashlist; and in the case of an L2TPv3 session into + * an additional per-net ("global") hashlist. + */ struct l2tp_session { int magic; /* should be L2TP_SESSION_MAGIC */ long dead; @@ -101,15 +104,32 @@ struct l2tp_session { struct l2tp_stats stats; struct hlist_node global_hlist; /* global hash list node */ + /* Session receive handler for data packets. + * Each pseudowire implementation should implement this callback in order to + * handle incoming packets. Packets are passed to the pseudowire handler after + * reordering, if data sequence numbers are enabled for the session. + */ void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); + + /* Session close handler. + * Each pseudowire implementation may implement this callback in order to carry + * out pseudowire-specific shutdown actions. + * The callback is called by core after unhashing the session and purging its + * reorder queue. + */ void (*session_close)(struct l2tp_session *session); + + /* Session show handler. + * Pseudowire-specific implementation of debugfs session rendering. + * The callback is called by l2tp_debugfs.c after rendering core session + * information. + */ void (*show)(struct seq_file *m, void *priv); + u8 priv[]; /* private data */ }; -/* Describes the tunnel. It contains info to track all the associated - * sessions so incoming packets can be sorted out - */ +/* L2TP tunnel configuration */ struct l2tp_tunnel_cfg { int debug; /* bitmask of debug message categories */ enum l2tp_encap_type encap; @@ -128,6 +148,12 @@ struct l2tp_tunnel_cfg { udp6_zero_rx_checksums:1; }; +/* Represents a tunnel instance. + * Tracks runtime state including IO statistics. + * Holds the tunnel socket (either passed from userspace or directly created by the kernel). + * Maintains a hashlist of sessions belonging to the tunnel instance. + * Is linked into a per-net list of tunnels. + */ struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ @@ -162,10 +188,23 @@ struct l2tp_tunnel { struct work_struct del_work; }; +/* Pseudowire ops callbacks for use with the l2tp genetlink interface */ struct l2tp_nl_cmd_ops { + /* The pseudowire session create callback is responsible for creating a session + * instance for a specific pseudowire type. + * It must call l2tp_session_create and l2tp_session_register to register the + * session instance, as well as carry out any pseudowire-specific initialisation. + * It must return >= 0 on success, or an appropriate negative errno value on failure. + */ int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); + + /* The pseudowire session delete callback is responsible for initiating the deletion + * of a session instance. + * It must call l2tp_session_delete, as well as carry out any pseudowire-specific + * teardown actions. + */ void (*session_delete)(struct l2tp_session *session); }; @@ -174,11 +213,16 @@ static inline void *l2tp_session_priv(struct l2tp_session *session) return &session->priv[0]; } +/* Tunnel and session refcounts */ void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel); void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel); void l2tp_session_inc_refcount(struct l2tp_session *session); void l2tp_session_dec_refcount(struct l2tp_session *session); +/* Tunnel and session lookup. + * These functions take a reference on the instances they return, so + * the caller must ensure that the reference is dropped appropriately. + */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth); struct l2tp_session *l2tp_tunnel_get_session(struct l2tp_tunnel *tunnel, @@ -189,33 +233,47 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname); +/* Tunnel and session lifetime management. + * Creation of a new instance is a two-step process: create, then register. + * Destruction is triggered using the *_delete functions, and completes asynchronously. + */ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg); - void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); + struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel); - void l2tp_session_delete(struct l2tp_session *session); + +/* Receive path helpers. If data sequencing is enabled for the session these + * functions handle queuing and reordering prior to passing packets to the + * pseudowire code to be passed to userspace. + */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); -void l2tp_session_set_header_len(struct l2tp_session *session, int version); +/* Transmit path helpers for sending packets over the tunnel socket. */ +void l2tp_session_set_header_len(struct l2tp_session *session, int version); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); -int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, - const struct l2tp_nl_cmd_ops *ops); +/* Pseudowire management. + * Pseudowires should register with l2tp core on module init, and unregister + * on module exit. + */ +int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops); void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); + +/* IOCTL helper for IP encap modules. */ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); static inline int l2tp_get_l2specific_len(struct l2tp_session *session) -- cgit From c64c9c282a9a7ec0515b725d5aaed68c32e403a4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 26 Jul 2020 14:02:28 +0200 Subject: udp, bpf: Ignore connections in reuseport group after BPF sk lookup When BPF sk lookup invokes reuseport handling for the selected socket, it should ignore the fact that reuseport group can contain connected UDP sockets. With BPF sk lookup this is not relevant as we are not scoring sockets to find the best match, which might be a connected UDP socket. Fix it by unconditionally accepting the socket selected by reuseport. This fixes the following two failures reported by test_progs. # ./test_progs -t sk_lookup ... #73/14 UDP IPv4 redir and reuseport with conns:FAIL ... #73/20 UDP IPv6 redir and reuseport with conns:FAIL ... Fixes: a57066b1a019 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Reported-by: Alexei Starovoitov Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200726120228.1414348-1-jakub@cloudflare.com --- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7ce31beccfc2..e88efba07551 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -473,7 +473,7 @@ static struct sock *udp4_lookup_run_bpf(struct net *net, return sk; reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk && !reuseport_has_conns(sk, false)) + if (reuse_sk) sk = reuse_sk; return sk; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c394e674f486..29d9691359b9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -208,7 +208,7 @@ static inline struct sock *udp6_lookup_run_bpf(struct net *net, return sk; reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk && !reuseport_has_conns(sk, false)) + if (reuse_sk) sk = reuse_sk; return sk; } -- cgit From a0d716d8e42abcb973afed27c6fa44bd146d2616 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 29 Jul 2020 22:17:00 -0500 Subject: net/sched: act_pedit: Use flex_array_size() helper in memcpy() Make use of the flex_array_size() helper to calculate the size of a flexible array member within an enclosing structure. This helper offers defense-in-depth against potential integer overflows, while at the same time makes it explicitly clear that we are dealing with a flexible array member. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 66986db062ed..c158bfed86d5 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -436,8 +436,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, return -ENOBUFS; spin_lock_bh(&p->tcf_lock); - memcpy(opt->keys, p->tcfp_keys, - p->tcfp_nkeys * sizeof(struct tc_pedit_key)); + memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys)); opt->index = p->tcf_index; opt->nkeys = p->tcfp_nkeys; opt->flags = p->tcfp_flags; -- cgit From 9a9373ffc7338377c3837ce0ccd40f5c4402c9d1 Mon Sep 17 00:00:00 2001 From: Alain Michaud Date: Fri, 31 Jul 2020 01:05:34 +0000 Subject: Bluetooth: use the proper scan params when conn is pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an LE connection is requested and an RPA update is needed via hci_connect_le_scan, the default scanning parameters are used rather than the connect parameters. This leads to significant delays in the connection establishment process when using lower duty cycle scanning parameters. The patch simply looks at the pended connection list when trying to determine which scanning parameters should be used. Before: < HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 8                             #378 [hci0] 1659.247156         Own address type: Public (0x00)         Filter policy: Ignore not in white list (0x01)         PHYs: 0x01         Entry 0: LE 1M           Type: Passive (0x00)           Interval: 367.500 msec (0x024c)           Window: 37.500 msec (0x003c) After: < HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 8                               #39 [hci0] 7.422109         Own address type: Public (0x00)         Filter policy: Ignore not in white list (0x01)         PHYs: 0x01         Entry 0: LE 1M           Type: Passive (0x00)           Interval: 60.000 msec (0x0060)           Window: 60.000 msec (0x0060) Signed-off-by: Alain Michaud Reviewed-by: Abhishek Pandit-Subedi Reviewed-by: Yu Liu Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 435400a43a78..e0269192f2e5 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -953,6 +953,27 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, } } +/* Returns true if an le connection is in the scanning state */ +static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c->type == LE_LINK && c->state == BT_CONNECT && + test_bit(HCI_CONN_SCANNING, &c->flags)) { + rcu_read_unlock(); + return true; + } + } + + rcu_read_unlock(); + + return false; +} + /* Ensure to call hci_req_add_le_scan_disable() first to disable the * controller based address resolution to be able to reconfigure * resolving list. @@ -1003,6 +1024,9 @@ void hci_req_add_le_passive_scan(struct hci_request *req) if (hdev->suspended) { window = hdev->le_scan_window_suspend; interval = hdev->le_scan_int_suspend; + } else if (hci_is_le_conn_scanning(hdev)) { + window = hdev->le_scan_window_connect; + interval = hdev->le_scan_int_connect; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; -- cgit From df78a0c0b67de58934877aad61e0431a2bd0caf1 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Mon, 1 Jun 2020 23:22:47 -0700 Subject: nl80211: S1G band and channel definitions Gives drivers the definitions needed to advertise support for S1G bands. Signed-off-by: Thomas Pedersen Link: https://lore.kernel.org/r/20200602062247.23212-1-thomas@adapt-ip.com Link: https://lore.kernel.org/r/20200731055636.795173-1-thomas@adapt-ip.com Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 7 ++++++- net/mac80211/scan.c | 1 + net/mac80211/tx.c | 1 + net/mac80211/util.c | 5 +++++ net/wireless/chan.c | 35 +++++++++++++++++++++++++++++++++++ net/wireless/core.c | 5 +++-- net/wireless/util.c | 8 ++++++++ 7 files changed, 59 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e6e192f53e4e..08cf9da9c1e3 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -313,9 +313,14 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, lockdep_assert_held(&local->chanctx_mtx); - /* don't optimize 5MHz, 10MHz, and radar_enabled confs */ + /* don't optimize non-20MHz based and radar_enabled confs */ if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 || ctx->conf.def.width == NL80211_CHAN_WIDTH_10 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_1 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_2 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_4 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_8 || + ctx->conf.def.width == NL80211_CHAN_WIDTH_16 || ctx->conf.radar_enabled) { ctx->conf.min_def = ctx->conf.def; return; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index ad90bbe57457..8003be6dae8a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -913,6 +913,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, case NL80211_BSS_CHAN_WIDTH_10: local->scan_chandef.width = NL80211_CHAN_WIDTH_10; break; + default: case NL80211_BSS_CHAN_WIDTH_20: /* If scanning on oper channel, use whatever channel-type * is currently in use. diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1a2941e5244f..ee30ef441f4a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -166,6 +166,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; + case NL80211_BAND_S1GHZ: case NL80211_BAND_60GHZ: /* TODO, for now fall through */ case NUM_NL80211_BANDS: diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 21c94094a699..64a83ecd0a73 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3730,6 +3730,11 @@ u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c) c->width = NL80211_CHAN_WIDTH_20_NOHT; ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; break; + case NL80211_CHAN_WIDTH_1: + case NL80211_CHAN_WIDTH_2: + case NL80211_CHAN_WIDTH_4: + case NL80211_CHAN_WIDTH_8: + case NL80211_CHAN_WIDTH_16: case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: WARN_ON_ONCE(1); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index cddf92c5d09e..90f0f82cd9ca 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -153,6 +153,11 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_1: + case NL80211_CHAN_WIDTH_2: + case NL80211_CHAN_WIDTH_4: + case NL80211_CHAN_WIDTH_8: + case NL80211_CHAN_WIDTH_16: case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: @@ -263,6 +268,21 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) int width; switch (c->width) { + case NL80211_CHAN_WIDTH_1: + width = 1; + break; + case NL80211_CHAN_WIDTH_2: + width = 2; + break; + case NL80211_CHAN_WIDTH_4: + width = 4; + break; + case NL80211_CHAN_WIDTH_8: + width = 8; + break; + case NL80211_CHAN_WIDTH_16: + width = 16; + break; case NL80211_CHAN_WIDTH_5: width = 5; break; @@ -911,6 +931,21 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_1: + width = 1; + break; + case NL80211_CHAN_WIDTH_2: + width = 2; + break; + case NL80211_CHAN_WIDTH_4: + width = 4; + break; + case NL80211_CHAN_WIDTH_8: + width = 8; + break; + case NL80211_CHAN_WIDTH_16: + width = 16; + break; case NL80211_CHAN_WIDTH_5: width = 5; break; diff --git a/net/wireless/core.c b/net/wireless/core.c index c623d9bf5096..1971d7e6eb55 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -803,10 +803,11 @@ int wiphy_register(struct wiphy *wiphy) if (WARN_ON(!sband->n_channels)) return -EINVAL; /* - * on 60GHz band, there are no legacy rates, so + * on 60GHz or sub-1Ghz band, there are no legacy rates, so * n_bitrates is 0 */ - if (WARN_ON(band != NL80211_BAND_60GHZ && + if (WARN_ON((band != NL80211_BAND_60GHZ && + band != NL80211_BAND_S1GHZ) && !sband->n_bitrates)) return -EINVAL; diff --git a/net/wireless/util.c b/net/wireless/util.c index 4d3b76f94f55..26a977343c3b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -102,6 +102,8 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) if (chan < 7) return MHZ_TO_KHZ(56160 + chan * 2160); break; + case NL80211_BAND_S1GHZ: + return 902000 + chan * 500; default: ; } @@ -210,6 +212,12 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) WARN_ON(!sband->ht_cap.ht_supported); WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e); break; + case NL80211_BAND_S1GHZ: + /* Figure 9-589bd: 3 means unsupported, so != 3 means at least + * mandatory is ok. + */ + WARN_ON((sband->s1g_cap.nss_mcs[0] & 0x3) == 0x3); + break; case NUM_NL80211_BANDS: default: WARN_ON(1); -- cgit From f2a0c18759072dbd5135f72a8035f6fb838df425 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 22 Jul 2020 16:38:30 +0100 Subject: mac80211: remove the need for variable rates_idx Currently rates_idx is being initialized with the value -1 and this value is never read so the initialization is redundant and can be removed. The next time the variable is used it is assigned a value that is returned a few statements later. Just return i - 1 and remove the need for rates_idx. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200722153830.959010-1-colin.king@canonical.com Signed-off-by: Johannes Berg --- net/mac80211/status.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/status.c b/net/mac80211/status.c index cbc40b358ba2..adb1d30ce06e 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -799,7 +799,6 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, struct ieee80211_tx_info *info, int *retry_count) { - int rates_idx = -1; int count = -1; int i; @@ -821,13 +820,12 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, count += info->status.rates[i].count; } - rates_idx = i - 1; if (count < 0) count = 0; *retry_count = count; - return rates_idx; + return i - 1; } void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, -- cgit From 9c167b2ddc1a89e6f6378e65bfd20e8c0ddf65f1 Mon Sep 17 00:00:00 2001 From: Julian Squires Date: Mon, 20 Jul 2020 12:20:35 -0230 Subject: cfg80211: allow vendor dumpit to terminate by returning 0 nl80211 vendor netlink dumpit, like netlink_callback->dump, should signal successful completion by returning 0. Currently, that will just cause dumpit to be called again, possibly many times until an error occurs. Since skb->len is never going to be 0 by the time dumpit is called, the only way for dumpit to signal completion is by returning an error. If it returns a positive value, the current message is cancelled, but that positive value is returned and nl80211_vendor_cmd_dump gets called again. Fix that by passing a return value of 0 through. Signed-off-by: Julian Squires Link: https://lore.kernel.org/r/20200720145033.401307-1-julian@cipht.net [reword commit message] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0e07fb8585fb..e9cf08a1a8b9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13479,7 +13479,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, if (err == -ENOBUFS || err == -ENOENT) { genlmsg_cancel(skb, hdr); break; - } else if (err) { + } else if (err <= 0) { genlmsg_cancel(skb, hdr); goto out; } -- cgit From 8328685682965cbf9348f6f85cadc8c0598771d9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 12 Jul 2020 19:35:39 +0200 Subject: nl80211: Remove a misleading label in 'nl80211_trigger_scan()' Since commit 5fe231e87372 ("cfg80211: vastly simplify locking"), the 'unlock' label at the end of 'nl80211_trigger_scan()' is useless and misleading, because nothing is unlocked there. Direct return can be used instead of 'err = -; goto unlock;' construction. Remove this label and simplify code accordingly. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20200712173539.274395-1-christophe.jaillet@wanadoo.fr Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e9cf08a1a8b9..96b94a900625 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7774,10 +7774,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - if (rdev->scan_req || rdev->scan_msg) { - err = -EBUSY; - goto unlock; - } + if (rdev->scan_req || rdev->scan_msg) + return -EBUSY; if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) { if (!wiphy_ext_feature_isset(wiphy, @@ -7790,10 +7788,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (scan_freqs) { n_channels = validate_scan_freqs(scan_freqs); - if (!n_channels) { - err = -EINVAL; - goto unlock; - } + if (!n_channels) + return -EINVAL; } else { n_channels = ieee80211_get_num_supported_channels(wiphy); } @@ -7802,29 +7798,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; - if (n_ssids > wiphy->max_scan_ssids) { - err = -EINVAL; - goto unlock; - } + if (n_ssids > wiphy->max_scan_ssids) + return -EINVAL; if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; - if (ie_len > wiphy->max_scan_ie_len) { - err = -EINVAL; - goto unlock; - } + if (ie_len > wiphy->max_scan_ie_len) + return -EINVAL; request = kzalloc(sizeof(*request) + sizeof(*request->ssids) * n_ssids + sizeof(*request->channels) * n_channels + ie_len, GFP_KERNEL); - if (!request) { - err = -ENOMEM; - goto unlock; - } + if (!request) + return -ENOMEM; if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; @@ -8013,7 +8003,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) kfree(request); } - unlock: return err; } -- cgit From 504776be46cb61bc0606e0e943fd6a04c38f2130 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 12 Jul 2020 19:35:51 +0200 Subject: nl80211: Simplify error handling path in 'nl80211_trigger_scan()' Re-write the end of 'nl80211_trigger_scan()' with a more standard, easy to understand and future proof version. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20200712173551.274448-1-christophe.jaillet@wanadoo.fr Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 96b94a900625..6fdf818f66cf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7993,15 +7993,18 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) rdev->scan_req = request; err = rdev_scan(rdev, request); - if (!err) { - nl80211_send_scan_start(rdev, wdev); - if (wdev->netdev) - dev_hold(wdev->netdev); - } else { + if (err) + goto out_free; + + nl80211_send_scan_start(rdev, wdev); + if (wdev->netdev) + dev_hold(wdev->netdev); + + return 0; + out_free: - rdev->scan_req = NULL; - kfree(request); - } + rdev->scan_req = NULL; + kfree(request); return err; } -- cgit From fc0561dc6a9c61613ea703f54f9cf1c6bb2e0b68 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 Jul 2020 15:45:48 -0500 Subject: mac80211: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200707204548.GA9320@embeddedor Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 6 +++--- net/mac80211/chan.c | 2 +- net/mac80211/ht.c | 4 ++-- net/mac80211/ibss.c | 4 ++-- net/mac80211/iface.c | 10 +++++----- net/mac80211/key.c | 2 +- net/mac80211/mesh.c | 4 ++-- net/mac80211/mesh_hwmp.c | 2 +- net/mac80211/mesh_plink.c | 2 +- net/mac80211/mlme.c | 4 ++-- net/mac80211/offchannel.c | 4 ++-- net/mac80211/rx.c | 4 ++-- net/mac80211/tdls.c | 8 ++++---- net/mac80211/tx.c | 12 ++++++------ net/mac80211/util.c | 15 +++++++-------- net/mac80211/wme.c | 2 +- 16 files changed, 42 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9b360544ad6f..b4a74064675e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -608,12 +608,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_BIP_CMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_cmac)); - /* fall through */ + fallthrough; case WLAN_CIPHER_SUITE_BIP_GMAC_128: case WLAN_CIPHER_SUITE_BIP_GMAC_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != offsetof(typeof(kseq), aes_gmac)); - /* fall through */ + fallthrough; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: BUILD_BUG_ON(offsetof(typeof(kseq), ccmp) != @@ -2336,7 +2336,7 @@ static int ieee80211_scan(struct wiphy *wiphy, * for now fall through to allow scanning only when * beaconing hasn't been configured yet */ - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: /* * If the scan has been forced (and the driver supports diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 08cf9da9c1e3..bdc0f29dc6cd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -748,7 +748,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, default: WARN_ONCE(1, "Invalid SMPS mode %d\n", sdata->smps_mode); - /* fall through */ + fallthrough; case IEEE80211_SMPS_OFF: needed_static = sdata->needed_rx_chains; needed_dynamic = sdata->needed_rx_chains; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index e32906202575..3d62a80b5790 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -250,7 +250,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.bss_conf.chandef.width) { default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: bw = IEEE80211_STA_RX_BW_20; @@ -517,7 +517,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); - /* fall through */ + fallthrough; case IEEE80211_SMPS_OFF: action_frame->u.action.u.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 81d26fef41e9..53632c2f5217 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -791,7 +791,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: sta_flags |= IEEE80211_STA_DISABLE_HT; - /* fall through */ + fallthrough; case NL80211_CHAN_WIDTH_20: sta_flags |= IEEE80211_STA_DISABLE_40MHZ; break; @@ -1401,7 +1401,7 @@ ieee80211_ibss_setup_scan_channels(struct wiphy *wiphy, break; case NL80211_CHAN_WIDTH_80P80: cf2 = chandef->center_freq2; - /* fall through */ + fallthrough; case NL80211_CHAN_WIDTH_80: width = 80; break; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f900c84fb40f..570f818384e8 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -978,7 +978,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); - /* fall through */ + fallthrough; default: cancel_work_sync(&sdata->work); /* @@ -1048,7 +1048,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; - /* fall through */ + fallthrough; default: if (going_down) drv_remove_interface(local, sdata); @@ -1497,7 +1497,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, type = NL80211_IFTYPE_AP; sdata->vif.type = type; sdata->vif.p2p = true; - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps.bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); @@ -1507,7 +1507,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, type = NL80211_IFTYPE_STATION; sdata->vif.type = type; sdata->vif.p2p = true; - /* fall through */ + fallthrough; case NL80211_IFTYPE_STATION: sdata->vif.bss_conf.bssid = sdata->u.mgd.bssid; ieee80211_sta_setup_sdata(sdata); @@ -1703,7 +1703,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, goto out_unlock; } } - /* fall through */ + fallthrough; default: /* assign a new address if possible -- try n_addresses first */ for (i = 0; i < local->hw.wiphy->n_addresses; i++) { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 8f403c1bb908..9c2888004878 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -225,7 +225,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) */ if (sdata->hw_80211_encap) return -EINVAL; - /* Fall through */ + fallthrough; case WLAN_CIPHER_SUITE_AES_CMAC: case WLAN_CIPHER_SUITE_BIP_CMAC_256: diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5f1ca25b6c97..96f0323c0a3d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1094,10 +1094,10 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.bss_conf.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: sta_flags |= IEEE80211_STA_DISABLE_HT; - /* fall through */ + fallthrough; case NL80211_CHAN_WIDTH_20: sta_flags |= IEEE80211_STA_DISABLE_40MHZ; - /* fall through */ + fallthrough; case NL80211_CHAN_WIDTH_40: sta_flags |= IEEE80211_STA_DISABLE_VHT; break; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 02cde0fd08fe..bae3a3e15b88 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1266,7 +1266,7 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) break; case IEEE80211_PROACTIVE_PREQ_WITH_PREP: flags |= IEEE80211_PREQ_PROACTIVE_PREP_FLAG; - /* fall through */ + fallthrough; case IEEE80211_PROACTIVE_PREQ_NO_PREP: interval = ifmsh->mshcfg.dot11MeshHWMPactivePathToRootTimeout; target_flags |= IEEE80211_PREQ_TO_FLAG | diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 798e4b6b383f..15f2fc658f70 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -699,7 +699,7 @@ void mesh_plink_timer(struct timer_list *t) break; } reason = WLAN_REASON_MESH_MAX_RETRIES; - /* fall through */ + fallthrough; case NL80211_PLINK_CNF_RCVD: /* confirm timer */ if (!reason) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b2a9d47cf86d..1c1b3f6bc415 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -533,7 +533,7 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); - /* fall through */ + fallthrough; case IEEE80211_SMPS_OFF: cap |= WLAN_HT_CAP_SM_PS_DISABLED << IEEE80211_HT_CAP_SM_PS_SHIFT; @@ -1529,7 +1529,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, switch (channel->band) { default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case NL80211_BAND_2GHZ: case NL80211_BAND_60GHZ: chan_increment = 1; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index db3b8bf75656..7b842aa903c3 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -808,13 +808,13 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (!sdata->vif.bss_conf.ibss_joined) need_offchan = true; #ifdef CONFIG_MAC80211_MESH - /* fall through */ + fallthrough; case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.mesh_id_len) need_offchan = true; #endif - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5c5af4b5fc08..8ea93735e47c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3736,7 +3736,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) rx->sta->rx_stats.dropped++; - /* fall through */ + fallthrough; case RX_CONTINUE: { struct ieee80211_rate *rate = NULL; struct ieee80211_supported_band *sband; @@ -4752,7 +4752,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, break; default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case RX_ENC_LEGACY: if (WARN_ON(status->rate_idx >= sband->n_bitrates)) goto drop; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 4b0cff4a07bd..e01e4daeb8cd 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -239,7 +239,7 @@ static enum ieee80211_ac_numbers ieee80211_ac_from_wmm(int ac) switch (ac) { default: WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case 0: return IEEE80211_AC_BE; case 1: @@ -952,7 +952,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, set_sta_flag(sta, WLAN_STA_TDLS_INITIATOR); sta->sta.tdls_initiator = false; } - /* fall-through */ + fallthrough; case WLAN_TDLS_SETUP_CONFIRM: case WLAN_TDLS_DISCOVERY_REQUEST: initiator = true; @@ -967,7 +967,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, clear_sta_flag(sta, WLAN_STA_TDLS_INITIATOR); sta->sta.tdls_initiator = true; } - /* fall-through */ + fallthrough; case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: initiator = false; break; @@ -1222,7 +1222,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, * by the AP. */ drv_mgd_protect_tdls_discover(sdata->local, sdata); - /* fall-through */ + fallthrough; case WLAN_TDLS_SETUP_CONFIRM: case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: /* no special handling */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ee30ef441f4a..0d0e901b1d4c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1740,7 +1740,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, case NL80211_IFTYPE_AP_VLAN: sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - /* fall through */ + fallthrough; default: vif = &sdata->vif; break; @@ -2383,7 +2383,7 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, } else if (sdata->wdev.use_4addr) { return -ENOLINK; } - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_ADHOC: @@ -2553,7 +2553,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, band = chanctx_conf->def.chan->band; if (sdata->wdev.use_4addr) break; - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: if (sdata->vif.type == NL80211_IFTYPE_AP) chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -3000,7 +3000,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) build.hdr_len = 30; break; } - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ @@ -3711,7 +3711,7 @@ begin: case NL80211_IFTYPE_AP_VLAN: tx.sdata = container_of(tx.sdata->bss, struct ieee80211_sub_if_data, u.ap); - /* fall through */ + fallthrough; default: vif = &tx.sdata->vif; break; @@ -4050,7 +4050,7 @@ static bool ieee80211_multicast_to_unicast(struct sk_buff *skb, return false; if (sdata->wdev.use_4addr) return false; - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: /* check runtime toggle for this bss */ if (!sdata->bss->multicast_to_unicast) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 64a83ecd0a73..b768ebd7117f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2347,10 +2347,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_ADHOC: if (sdata->vif.bss_conf.ibss_joined) WARN_ON(drv_join_ibss(local, sdata)); - /* fall through */ + fallthrough; default: ieee80211_reconfig_stations(sdata); - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: /* AP stations are handled later */ for (i = 0; i < IEEE80211_NUM_ACS; i++) drv_conf_tx(local, sdata, i, @@ -2399,7 +2399,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: changed |= BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS; @@ -2414,8 +2414,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (rcu_access_pointer(sdata->u.ap.beacon)) drv_start_ap(local, sdata); } - - /* fall through */ + fallthrough; case NL80211_IFTYPE_MESH_POINT: if (sdata->vif.bss_conf.enable_beacon) { changed |= BSS_CHANGED_BEACON | @@ -2885,7 +2884,7 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); - /* fall through */ + fallthrough; case IEEE80211_SMPS_OFF: cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED, IEEE80211_HE_6GHZ_CAP_SM_PS); @@ -3196,7 +3195,7 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, break; case 0x01: support_80_80 = false; - /* fall through */ + fallthrough; case 0x02: case 0x03: ccf1 = ccfs2; @@ -3566,7 +3565,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, break; default: WARN_ON(1); - /* fall through */ + fallthrough; case RX_ENC_LEGACY: { struct ieee80211_supported_band *sband; int shift = 0; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 72920d82928c..2fb99325135a 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -198,7 +198,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, sta = rcu_dereference(sdata->u.vlan.sta); if (sta) break; - /* fall through */ + fallthrough; case NL80211_IFTYPE_AP: ra = skb->data; break; -- cgit From 2f1805ea209a146669e0b660633ed22f49e1dd49 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 25 Jun 2020 14:15:24 +0300 Subject: cfg80211: allow the low level driver to flush the BSS table The low level driver adds its own opaque information in the BSS table in the cfg80211_bss structure. The low level driver may need to signal that this information is no longer relevant and needs to be recreated. Add an API to allow the low level driver to do that. iwlwifi needs this because it keeps there an information about the firmware's internal clock. This is kept in mac80211's struct ieee80211_bss::sync_device_ts. This information is populated while we scan, we add the internal firmware's clock to each beacon which allows us to program the firmware correctly after association so that it'll know when (in terms of its internal clock) the DTIM and TBTT will happen. When the firmware is reset this internal clock is reset as well and ieee80211_bss::sync_device_ts is no longer accurate. iwlwifi will call this new API any time the firmware is started. Signed-off-by: Emmanuel Grumbach Link: https://lore.kernel.org/r/20200625111524.3992-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 74ea4cfb39fb..e67a74488bbe 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -712,6 +712,16 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } +void cfg80211_bss_flush(struct wiphy *wiphy) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, jiffies); + spin_unlock_bh(&rdev->bss_lock); +} +EXPORT_SYMBOL(cfg80211_bss_flush); + const struct element * cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len, const u8 *match, unsigned int match_len, -- cgit From e3718a611470d311a92c60d4eb535270b49a7108 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Wed, 17 Jun 2020 09:30:33 +0200 Subject: cfg80211/mac80211: add mesh_param "mesh_nolearn" to skip path discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, before being able to forward a packet between two 802.11s nodes, both a PLINK handshake is performed upon receiving a beacon and then later a PREQ/PREP exchange for path discovery is performed on demand upon receiving a data frame to forward. When running a mesh protocol on top of an 802.11s interface, like batman-adv, we do not need the multi-hop mesh routing capabilities of 802.11s and usually set mesh_fwding=0. However, even with mesh_fwding=0 the PREQ/PREP path discovery is still performed on demand. Even though in this scenario the next hop PREQ/PREP will determine is always the direct 11s neighbor node. The new mesh_nolearn parameter allows to skip the PREQ/PREP exchange in this scenario, leading to a reduced delay, reduced packet buffering and simplifies HWMP in general. mesh_nolearn is still rather conservative in that if the packet destination is not a direct 11s neighbor, it will fall back to PREQ/PREP path discovery. For normal, multi-hop 802.11s mesh routing it is usually not advisable to enable mesh_nolearn as a transmission to a direct but distant neighbor might be worse than reaching that same node via a more robust / higher throughput etc. multi-hop path. Cc: Sven Eckelmann Cc: Simon Wunderlich Signed-off-by: Linus Lüssing Link: https://lore.kernel.org/r/20200617073034.26149-1-linus.luessing@c0d3.blue [fix nl80211 policy to range 0/1 only] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ net/mac80211/debugfs_netdev.c | 2 ++ net/mac80211/mesh_hwmp.c | 39 +++++++++++++++++++++++++++++++++++++++ net/wireless/mesh.c | 1 + net/wireless/nl80211.c | 7 ++++++- net/wireless/trace.h | 4 +++- 6 files changed, 53 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b4a74064675e..9af56b848544 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2126,6 +2126,8 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask)) conf->dot11MeshConnectedToMeshGate = nconf->dot11MeshConnectedToMeshGate; + if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask)) + conf->dot11MeshNolearn = nconf->dot11MeshNolearn; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index d7e955127d5c..09eab2c3f380 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -638,6 +638,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC); IEEE80211_IF_FILE(dot11MeshConnectedToMeshGate, u.mesh.mshcfg.dot11MeshConnectedToMeshGate, DEC); +IEEE80211_IF_FILE(dot11MeshNolearn, u.mesh.mshcfg.dot11MeshNolearn, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -762,6 +763,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(power_mode); MESHPARAMS_ADD(dot11MeshAwakeWindowDuration); MESHPARAMS_ADD(dot11MeshConnectedToMeshGate); + MESHPARAMS_ADD(dot11MeshNolearn); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index bae3a3e15b88..bec23d2eee7a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1172,6 +1172,40 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, return -ENOENT; } +/** + * mesh_nexthop_lookup_nolearn - try to set next hop without path discovery + * @skb: 802.11 frame to be sent + * @sdata: network subif the frame will be sent through + * + * Check if the meshDA (addr3) of a unicast frame is a direct neighbor. + * And if so, set the RA (addr1) to it to transmit to this node directly, + * avoiding PREQ/PREP path discovery. + * + * Returns: 0 if the next hop was found and -ENOENT otherwise. + */ +static int mesh_nexthop_lookup_nolearn(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct sta_info *sta; + + if (is_multicast_ether_addr(hdr->addr1)) + return -ENOENT; + + rcu_read_lock(); + sta = sta_info_get(sdata, hdr->addr3); + + if (!sta || sta->mesh->plink_state != NL80211_PLINK_ESTAB) { + rcu_read_unlock(); + return -ENOENT; + } + rcu_read_unlock(); + + memcpy(hdr->addr1, hdr->addr3, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + return 0; +} + /** * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame. Calling * this function is considered "using" the associated mpath, so preempt a path @@ -1185,11 +1219,16 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, int mesh_nexthop_lookup(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; struct sta_info *next_hop; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; u8 *target_addr = hdr->addr3; + if (ifmsh->mshcfg.dot11MeshNolearn && + !mesh_nexthop_lookup_nolearn(sdata, skb)) + return 0; + mpath = mesh_path_lookup(sdata, target_addr); if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE)) return -ENOENT; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index eac5aa1419fc..e4e363138279 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -78,6 +78,7 @@ const struct mesh_config default_mesh_config = { .power_mode = NL80211_MESH_POWER_ACTIVE, .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, .plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT, + .dot11MeshNolearn = false, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6fdf818f66cf..257c06315464 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6885,7 +6885,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, cur_params.plink_timeout) || nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_GATE, - cur_params.dot11MeshConnectedToMeshGate)) + cur_params.dot11MeshConnectedToMeshGate) || + nla_put_u8(msg, NL80211_MESHCONF_NOLEARN, + cur_params.dot11MeshNolearn)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -6943,6 +6945,7 @@ nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_CONNECTED_TO_GATE] = NLA_POLICY_RANGE(NLA_U8, 0, 1), + [NL80211_MESHCONF_NOLEARN] = NLA_POLICY_RANGE(NLA_U8, 0, 1), }; static const struct nla_policy @@ -7094,6 +7097,8 @@ do { \ NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, mask, NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNolearn, mask, + NL80211_MESHCONF_NOLEARN, nla_get_u8); if (mask_out) *mask_out = mask; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b23cab016521..6e218a0acd4e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -68,7 +68,8 @@ __field(u16, ht_opmode) \ __field(u32, dot11MeshHWMPactivePathToRootTimeout) \ __field(u16, dot11MeshHWMProotInterval) \ - __field(u16, dot11MeshHWMPconfirmationInterval) + __field(u16, dot11MeshHWMPconfirmationInterval) \ + __field(bool, dot11MeshNolearn) #define MESH_CFG_ASSIGN \ do { \ __entry->dot11MeshRetryTimeout = conf->dot11MeshRetryTimeout; \ @@ -109,6 +110,7 @@ conf->dot11MeshHWMProotInterval; \ __entry->dot11MeshHWMPconfirmationInterval = \ conf->dot11MeshHWMPconfirmationInterval; \ + __entry->dot11MeshNolearn = conf->dot11MeshNolearn; \ } while (0) #define CHAN_ENTRY __field(enum nl80211_band, band) \ -- cgit From 184eebe664f0e11c485f6d309fe56297b3f75e9e Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 11 Jun 2020 16:02:37 +0200 Subject: cfg80211/mac80211: add connected to auth server to meshconf Besides information about num of peerings and gate connectivity, the mesh formation byte also contains a flag for authentication server connectivity, that currently cannot be set in the mesh conf. This patch adds this capability, which is necessary to implement 802.1X authentication in mesh mode. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200611140238.427461-1-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +++ net/mac80211/debugfs_netdev.c | 3 +++ net/mac80211/mesh.c | 5 ++++- net/wireless/nl80211.c | 8 +++++++- 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9af56b848544..6a6531a50e54 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2128,6 +2128,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, nconf->dot11MeshConnectedToMeshGate; if (_chg_mesh_attr(NL80211_MESHCONF_NOLEARN, mask)) conf->dot11MeshNolearn = nconf->dot11MeshNolearn; + if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_AS, mask)) + conf->dot11MeshConnectedToAuthServer = + nconf->dot11MeshConnectedToAuthServer; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 09eab2c3f380..fe8a7a87e513 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -639,6 +639,8 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration, IEEE80211_IF_FILE(dot11MeshConnectedToMeshGate, u.mesh.mshcfg.dot11MeshConnectedToMeshGate, DEC); IEEE80211_IF_FILE(dot11MeshNolearn, u.mesh.mshcfg.dot11MeshNolearn, DEC); +IEEE80211_IF_FILE(dot11MeshConnectedToAuthServer, + u.mesh.mshcfg.dot11MeshConnectedToAuthServer, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -764,6 +766,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshAwakeWindowDuration); MESHPARAMS_ADD(dot11MeshConnectedToMeshGate); MESHPARAMS_ADD(dot11MeshNolearn); + MESHPARAMS_ADD(dot11MeshConnectedToAuthServer); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 96f0323c0a3d..d0db6af16427 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -260,6 +260,7 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, bool is_connected_to_gate = ifmsh->num_gates > 0 || ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol || ifmsh->mshcfg.dot11MeshConnectedToMeshGate; + bool is_connected_to_as = ifmsh->mshcfg.dot11MeshConnectedToAuthServer; if (skb_tailroom(skb) < 2 + meshconf_len) return -ENOMEM; @@ -284,7 +285,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, /* Mesh Formation Info - number of neighbors */ neighbors = atomic_read(&ifmsh->estab_plinks); neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS); - *pos++ = (neighbors << 1) | is_connected_to_gate; + *pos++ = (is_connected_to_as << 7) | + (neighbors << 1) | + is_connected_to_gate; /* Mesh capability */ *pos = 0x00; *pos |= ifmsh->mshcfg.dot11MeshForwarding ? diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 257c06315464..434fd06dc5cf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6887,7 +6887,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_GATE, cur_params.dot11MeshConnectedToMeshGate) || nla_put_u8(msg, NL80211_MESHCONF_NOLEARN, - cur_params.dot11MeshNolearn)) + cur_params.dot11MeshNolearn) || + nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_AS, + cur_params.dot11MeshConnectedToAuthServer)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -6946,6 +6948,7 @@ nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_CONNECTED_TO_GATE] = NLA_POLICY_RANGE(NLA_U8, 0, 1), [NL80211_MESHCONF_NOLEARN] = NLA_POLICY_RANGE(NLA_U8, 0, 1), + [NL80211_MESHCONF_CONNECTED_TO_AS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), }; static const struct nla_policy @@ -7058,6 +7061,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToMeshGate, mask, NL80211_MESHCONF_CONNECTED_TO_GATE, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToAuthServer, mask, + NL80211_MESHCONF_CONNECTED_TO_AS, + nla_get_u8); /* * Check HT operation mode based on * IEEE 802.11-2016 9.4.2.57 HT Operation element. -- cgit From 1303a51c24100b3b1915d6f9072fe5ae5bb4c5f6 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Thu, 11 Jun 2020 16:02:38 +0200 Subject: cfg80211/mac80211: add connected to auth server to station info This patch adds the necessary bits to later query the auth server flag for every peer from iw. Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200611140238.427461-2-markus.theil@tu-ilmenau.de Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 4 +++- net/mac80211/sta_info.h | 2 ++ net/wireless/nl80211.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cd8487bc6fc2..a39773b40457 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2426,7 +2426,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | BIT_ULL(NL80211_STA_INFO_PEER_PM) | BIT_ULL(NL80211_STA_INFO_NONPEER_PM) | - BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE); + BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) | + BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS); sinfo->llid = sta->mesh->llid; sinfo->plid = sta->mesh->plid; @@ -2439,6 +2440,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->peer_pm = sta->mesh->peer_pm; sinfo->nonpeer_pm = sta->mesh->nonpeer_pm; sinfo->connected_to_gate = sta->mesh->connected_to_gate; + sinfo->connected_to_as = sta->mesh->connected_to_as; #endif } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 49728047dfad..9d398c9daa4c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -385,6 +385,7 @@ DECLARE_EWMA(mesh_tx_rate_avg, 8, 16) * @processed_beacon: set to true after peer rates and capabilities are * processed * @connected_to_gate: true if mesh STA has a path to a mesh gate + * @connected_to_as: true if mesh STA has a path to a authentication server * @fail_avg: moving percentage of failed MSDUs * @tx_rate_avg: moving average of tx bitrate */ @@ -404,6 +405,7 @@ struct mesh_sta { bool processed_beacon; bool connected_to_gate; + bool connected_to_as; enum nl80211_plink_state plink_state; u32 plink_timeout; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 434fd06dc5cf..13a38aab1565 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5395,6 +5395,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(PEER_PM, peer_pm, u32); PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); + PUT_SINFO(CONNECTED_TO_AS, connected_to_as, u8); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start_noflag(msg, -- cgit From 3ff901cb5df1d2102e924d75d91347a2a3070fa5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 24 Jul 2020 20:28:16 +0200 Subject: mac80211: improve AQL tx airtime estimation AQL does not take into account that most HT/VHT/HE traffic is A-MPDU aggregated. Because of that, the per-packet airtime overhead is vastly overestimated. Improve it by assuming an average aggregation length of 16 for non-legacy traffic if not using the VO AC queue. This should improve performance with high data rates, especially with multiple stations Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200724182816.18678-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 26 +++++++++++++++++++++----- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/tx.c | 3 ++- 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 9fc2968856c0..366f76c9003d 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -551,7 +551,7 @@ EXPORT_SYMBOL_GPL(ieee80211_calc_tx_airtime); u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *pubsta, - int len) + int len, bool ampdu) { struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *conf; @@ -572,10 +572,26 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, if (pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - - return ieee80211_calc_tx_airtime_rate(hw, - &sta->tx_stats.last_rate, - band, len); + struct ieee80211_tx_rate *rate = &sta->tx_stats.last_rate; + u32 airtime; + + if (!(rate->flags & (IEEE80211_TX_RC_VHT_MCS | + IEEE80211_TX_RC_MCS))) + ampdu = false; + + /* + * Assume that HT/VHT transmission on any AC except VO will + * use aggregation. Since we don't have reliable reporting + * of aggregation length, assume an average of 16. + * This will not be very accurate, but much better than simply + * assuming un-aggregated tx. + */ + airtime = ieee80211_calc_tx_airtime_rate(hw, rate, band, + ampdu ? len * 16 : len); + if (ampdu) + airtime /= 16; + + return airtime; } if (!conf) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ec1a71ac65f2..28b154c6e72d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2290,7 +2290,7 @@ extern const struct ethtool_ops ieee80211_ethtool_ops; u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *pubsta, - int len); + int len, bool ampdu); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline #else diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0d0e901b1d4c..7c0abe477b03 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3722,10 +3722,11 @@ encap_out: if (vif && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { + bool ampdu = txq->ac != IEEE80211_AC_VO; u32 airtime; airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta, - skb->len); + skb->len, ampdu); if (airtime) { airtime = ieee80211_info_set_tx_time_est(info, airtime); ieee80211_sta_update_pending_airtime(local, tx.sta, -- cgit From 180ac48ee62f53c26787350a956c5ac371cbe0b7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 26 Jul 2020 15:09:47 +0200 Subject: mac80211: calculate skb hash early when using itxq This avoids flow separation issues when using software encryption. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200726130947.88145-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7c0abe477b03..fd44a71eea58 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3952,6 +3952,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, if (local->ops->wake_tx_queue) { u16 queue = __ieee80211_select_queue(sdata, sta, skb); skb_set_queue_mapping(skb, queue); + skb_get_hash(skb); } if (sta) { -- cgit From 322cd27c06450b2db2cb6bdc68f3814149baf767 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Thu, 9 Jul 2020 08:16:21 +0530 Subject: cfg80211/mac80211: avoid bss color setting in non-HE modes Adding bss-color configuration is only valid in HE mode. Earlier we have enabled it by default, irrespective of capabilities/mode. Fix that. Reported-by: kernel test robot Reported-by: Rajkumar Manoharan Signed-off-by: P Praneesh Link: https://lore.kernel.org/r/1594262781-21444-1-git-send-email-ppranees@codeaurora.org [fix up commit message] Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 8 +++++--- net/mac80211/mlme.c | 4 +++- net/wireless/nl80211.c | 3 +++ 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6a6531a50e54..bbd86cd79954 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -991,9 +991,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | - BSS_CHANGED_TWT | - BSS_CHANGED_HE_OBSS_PD | - BSS_CHANGED_HE_BSS_COLOR; + BSS_CHANGED_TWT; int i, err; int prev_beacon_int; @@ -1019,6 +1017,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.frame_time_rts_th = le32_get_bits(params->he_oper->he_oper_params, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); + changed |= BSS_CHANGED_HE_OBSS_PD; + + if (!params->he_bss_color.disabled) + changed |= BSS_CHANGED_HE_BSS_COLOR; } mutex_lock(&local->mtx); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1c1b3f6bc415..8a92a62dc54d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3463,7 +3463,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, bss_conf->he_bss_color.disabled = le32_get_bits(elems->he_operation->he_oper_params, IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); - changed |= BSS_CHANGED_HE_BSS_COLOR; + + if (!bss_conf->he_bss_color.disabled) + changed |= BSS_CHANGED_HE_BSS_COLOR; bss_conf->htc_trig_based_pkt_ext = le32_get_bits(elems->he_operation->he_oper_params, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 13a38aab1565..b4048f3c5134 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4865,6 +4865,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); + /* disable BSS color by default */ + params.he_bss_color.disabled = true; + /* these are required for START_AP */ if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || !info->attrs[NL80211_ATTR_DTIM_PERIOD] || -- cgit From 1df2bdba528b5a7a30f1b107b6924aa79af5e00e Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:48 +0400 Subject: mac80211: never drop injected frames even if normally not allowed In ieee80211_tx_dequeue there is a check to see if the dequeued frame is allowed in the current state. Injected frames that are normally not allowed are being be dropped here. Fix this by checking if a frame was injected and if so always allowing it. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-1-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fd44a71eea58..007e070227fd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3619,7 +3619,7 @@ begin: tx.skb = skb; tx.sdata = vif_to_sdata(info->control.vif); - if (txq->sta) { + if (txq->sta && !(info->flags & IEEE80211_TX_CTL_INJECTED)) { tx.sta = container_of(txq->sta, struct sta_info, sta); /* * Drop unicast frames to unauthorised stations unless they are -- cgit From e02281e7a5c524aaf6a52bb01afc4cc49addb908 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:49 +0400 Subject: mac80211: add radiotap flag to prevent sequence number overwrite The radiotap specification contains a flag to indicate that the sequence number of an injected frame should not be overwritten. Parse this flag and define and set a corresponding Tx control flag. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-2-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 007e070227fd..413345056445 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2085,6 +2085,8 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, txflags = get_unaligned_le16(iterator.this_arg); if (txflags & IEEE80211_RADIOTAP_F_TX_NOACK) info->flags |= IEEE80211_TX_CTL_NO_ACK; + if (txflags & IEEE80211_RADIOTAP_F_TX_NOSEQNO) + info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO; break; case IEEE80211_RADIOTAP_RATE: -- cgit From 29c3e95f79adcef9d7999ec643639033585dba08 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:50 +0400 Subject: mac80211: do not overwrite the sequence number if requested Check if the Tx control flag is set to prevent sequence number overwrites, and if so, do not assign a new sequence number to the transmitted frame. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-3-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 413345056445..e1e915e7cc8e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -825,6 +825,9 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) if (ieee80211_is_qos_nullfunc(hdr->frame_control)) return TX_CONTINUE; + if (info->control.flags & IEEE80211_TX_CTRL_NO_SEQNO) + return TX_CONTINUE; + /* * Anything but QoS data that has a sequence number field * (is long enough) gets a sequence number from the global -- cgit From 2b3dab1353209256cb3d7e07f14584eac8c82508 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:51 +0400 Subject: mac80211: use same flag everywhere to avoid sequence number overwrite Use the IEEE80211_TX_CTRL_NO_SEQNO flag in ieee80211_tx_info to mark probe requests whose sequence number must not be overwritten. This provides consistency with the radiotap flag that can be set to indicate that the sequence number of an injected frame should not be overwritten. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-4-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/scan.c | 7 +++---- net/mac80211/tx.c | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 28b154c6e72d..6f77018db23f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -164,7 +164,6 @@ typedef unsigned __bitwise ieee80211_tx_result; #define TX_DROP ((__force ieee80211_tx_result) 1u) #define TX_QUEUED ((__force ieee80211_tx_result) 2u) -#define IEEE80211_TX_NO_SEQNO BIT(0) #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8003be6dae8a..032f55d38f7d 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -591,7 +591,6 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel) { struct sk_buff *skb; - u32 txdata_flags = 0; skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel, ssid, ssid_len, @@ -600,15 +599,15 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, if (skb) { if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) { struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 sn = get_random_u32(); - txdata_flags |= IEEE80211_TX_NO_SEQNO; + info->control.flags |= IEEE80211_TX_CTRL_NO_SEQNO; hdr->seq_ctrl = cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; - ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band, - txdata_flags); + ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band, 0); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e1e915e7cc8e..b85876d69433 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -836,8 +836,6 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) */ if (!ieee80211_is_data_qos(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) { - if (tx->flags & IEEE80211_TX_NO_SEQNO) - return TX_CONTINUE; /* driver should assign sequence number */ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; /* for pure STA mode without beacons, we can do it */ -- cgit From 08aca29aa8b18dec2e84bc97d27dabe133b75822 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:52 +0400 Subject: mac80211: remove unused flags argument in transmit functions The flags argument in transmit functions is no longer being used and can be removed. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-5-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 11 +++++------ net/mac80211/offchannel.c | 2 +- net/mac80211/rx.c | 2 +- net/mac80211/scan.c | 2 +- net/mac80211/sta_info.c | 2 +- net/mac80211/tx.c | 19 ++++++++----------- 7 files changed, 18 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bbd86cd79954..23ef0ebaf180 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3590,7 +3590,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, } local_bh_disable(); - ieee80211_xmit(sdata, sta, skb, 0); + ieee80211_xmit(sdata, sta, skb); local_bh_enable(); ret = 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6f77018db23f..1760ce77d89f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1966,12 +1966,11 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, bool bss_notify, bool enable_qos); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, struct sk_buff *skb, - u32 txdata_flags); + struct sta_info *sta, struct sk_buff *skb); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum nl80211_band band, u32 txdata_flags); + enum nl80211_band band); /* sta_out needs to be checked for ERR_PTR() before using */ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, @@ -1981,10 +1980,10 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, static inline void ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum nl80211_band band, u32 txdata_flags) + enum nl80211_band band) { rcu_read_lock(); - __ieee80211_tx_skb_tid_band(sdata, skb, tid, band, txdata_flags); + __ieee80211_tx_skb_tid_band(sdata, skb, tid, band); rcu_read_unlock(); } @@ -2002,7 +2001,7 @@ static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, } __ieee80211_tx_skb_tid_band(sdata, skb, tid, - chanctx_conf->def.chan->band, 0); + chanctx_conf->def.chan->band); rcu_read_unlock(); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 7b842aa903c3..f470d1a7ce9b 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -264,7 +264,7 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc, if (roc->mgmt_tx_cookie) { if (!WARN_ON(!roc->frame)) { ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7, - roc->chan->band, 0); + roc->chan->band); roc->frame = NULL; } } else { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8ea93735e47c..9ac3eaa00ce6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3591,7 +3591,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) } __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7, - status->band, 0); + status->band); } dev_kfree_skb(rx->skb); return RX_QUEUED; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 032f55d38f7d..5ac2785cdc7b 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -607,7 +607,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; - ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band, 0); + ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a39773b40457..bf9abfa25bc3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1455,7 +1455,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid, } info->band = chanctx_conf->def.chan->band; - ieee80211_xmit(sdata, sta, skb, 0); + ieee80211_xmit(sdata, sta, skb); rcu_read_unlock(); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b85876d69433..7fb4afaa9410 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1892,7 +1892,7 @@ EXPORT_SYMBOL(ieee80211_tx_prepare_skb); */ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb, - bool txpending, u32 txdata_flags) + bool txpending) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_data tx; @@ -1910,8 +1910,6 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, led_len = skb->len; res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb); - tx.flags |= txdata_flags; - if (unlikely(res_prepare == TX_DROP)) { ieee80211_free_txskb(&local->hw, skb); return true; @@ -1979,8 +1977,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, } void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, struct sk_buff *skb, - u32 txdata_flags) + struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -2015,7 +2012,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, } ieee80211_set_qos_hdr(sdata, skb); - ieee80211_tx(sdata, sta, skb, false, txdata_flags); + ieee80211_tx(sdata, sta, skb, false); } static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, @@ -2350,7 +2347,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, if (!ieee80211_parse_tx_radiotap(local, skb)) goto fail_rcu; - ieee80211_xmit(sdata, NULL, skb, 0); + ieee80211_xmit(sdata, NULL, skb); rcu_read_unlock(); return NETDEV_TX_OK; @@ -4014,7 +4011,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_tx_stats(dev, skb->len); - ieee80211_xmit(sdata, sta, skb, 0); + ieee80211_xmit(sdata, sta, skb); } goto out; out_free: @@ -4380,7 +4377,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, return true; } info->band = chanctx_conf->def.chan->band; - result = ieee80211_tx(sdata, NULL, skb, true, 0); + result = ieee80211_tx(sdata, NULL, skb, true); } else if (info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) { if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) { dev_kfree_skb(skb); @@ -5339,7 +5336,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid); void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int tid, - enum nl80211_band band, u32 txdata_flags) + enum nl80211_band band) { int ac = ieee80211_ac_from_tid(tid); @@ -5356,7 +5353,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, */ local_bh_disable(); IEEE80211_SKB_CB(skb)->band = band; - ieee80211_xmit(sdata, NULL, skb, txdata_flags); + ieee80211_xmit(sdata, NULL, skb); local_bh_enable(); } -- cgit From cb17ed29a7a5fea8c9bf70e8a05757d71650e025 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Thu, 23 Jul 2020 14:01:53 +0400 Subject: mac80211: parse radiotap header when selecting Tx queue Already parse the radiotap header in ieee80211_monitor_select_queue. In a subsequent commit this will allow us to add a radiotap flag that influences the queue on which injected packets will be sent. This also fixes the incomplete validation of the injected frame in ieee80211_monitor_select_queue: currently an out of bounds memory access may occur in in the called function ieee80211_select_queue_80211 if the 802.11 header is too small. Note that in ieee80211_monitor_start_xmit the radiotap header is parsed again, which is necessairy because ieee80211_monitor_select_queue is not always called beforehand. Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20200723100153.31631-6-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 15 +++++++++++---- net/mac80211/tx.c | 54 +++++++++++++++++++++++----------------------------- 2 files changed, 35 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 570f818384e8..9740ae8fa697 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1183,17 +1183,24 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr; - struct ieee80211_radiotap_header *rtap = (void *)skb->data; + int len_rthdr; if (local->hw.queues < IEEE80211_NUM_ACS) return 0; - if (skb->len < 4 || - skb->len < le16_to_cpu(rtap->it_len) + 2 /* frame control */) + /* reset flags and info before parsing radiotap header */ + memset(info, 0, sizeof(*info)); + + if (!ieee80211_parse_tx_radiotap(skb, dev)) return 0; /* doesn't matter, frame will be dropped */ - hdr = (void *)((u8 *)skb->data + le16_to_cpu(rtap->it_len)); + len_rthdr = ieee80211_get_radiotap_len(skb->data); + hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); + if (skb->len < len_rthdr + 2 || + skb->len < len_rthdr + ieee80211_hdrlen(hdr->frame_control)) + return 0; /* doesn't matter, frame will be dropped */ return ieee80211_select_queue_80211(sdata, skb, hdr); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7fb4afaa9410..ec35a92df8ed 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2015,9 +2015,10 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, ieee80211_tx(sdata, sta, skb, false); } -static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, - struct sk_buff *skb) +bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, + struct net_device *dev) { + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_radiotap_iterator iterator; struct ieee80211_radiotap_header *rthdr = (struct ieee80211_radiotap_header *) skb->data; @@ -2036,6 +2037,18 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, u8 vht_mcs = 0, vht_nss = 0; int i; + /* check for not even having the fixed radiotap header part */ + if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) + return false; /* too short to be possibly valid */ + + /* is it a header version we can trust to find length from? */ + if (unlikely(rthdr->it_version)) + return false; /* only version 0 is supported */ + + /* does the skb contain enough to deliver on the alleged length? */ + if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) + return false; /* skb too short for claimed rt header extent */ + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_CTL_DONTFRAG; @@ -2189,13 +2202,6 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local, local->hw.max_rate_tries); } - /* - * remove the radiotap header - * iterator->_max_length was sanity-checked against - * skb->len by iterator init - */ - skb_pull(skb, iterator._max_length); - return true; } @@ -2204,8 +2210,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_chanctx_conf *chanctx_conf; - struct ieee80211_radiotap_header *prthdr = - (struct ieee80211_radiotap_header *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *tmp_sdata, *sdata; @@ -2213,21 +2217,17 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, u16 len_rthdr; int hdrlen; - /* check for not even having the fixed radiotap header part */ - if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) - goto fail; /* too short to be possibly valid */ + memset(info, 0, sizeof(*info)); + info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_CTL_INJECTED; - /* is it a header version we can trust to find length from? */ - if (unlikely(prthdr->it_version)) - goto fail; /* only version 0 is supported */ + /* Sanity-check and process the injection radiotap header */ + if (!ieee80211_parse_tx_radiotap(skb, dev)) + goto fail; - /* then there must be a radiotap header with a length we can use */ + /* we now know there is a radiotap header with a length we can use */ len_rthdr = ieee80211_get_radiotap_len(skb->data); - /* does the skb contain enough to deliver on the alleged length? */ - if (unlikely(skb->len < len_rthdr)) - goto fail; /* skb too short for claimed rt header extent */ - /* * fix up the pointers accounting for the radiotap * header still being in there. We are being given @@ -2273,11 +2273,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK; } - memset(info, 0, sizeof(*info)); - - info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | - IEEE80211_TX_CTL_INJECTED; - rcu_read_lock(); /* @@ -2343,9 +2338,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, info->band = chandef->chan->band; - /* process and remove the injection radiotap header */ - if (!ieee80211_parse_tx_radiotap(local, skb)) - goto fail_rcu; + /* remove the injection radiotap header */ + skb_pull(skb, len_rthdr); ieee80211_xmit(sdata, NULL, skb); rcu_read_unlock(); -- cgit From c5d1686b314ea44c5f210990dee05caf98cb068f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 26 Jul 2020 13:06:11 +0200 Subject: mac80211: add a function for running rx without passing skbs to the stack This can be used to run mac80211 rx processing on a batch of frames in NAPI poll before passing them to the network stack in a large batch. This can improve icache footprint, or it can be used to pass frames via netif_receive_skb_list. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200726110611.46886-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rx.c | 60 +++++++++++++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1760ce77d89f..0b1eaec6649f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -217,7 +217,7 @@ enum ieee80211_rx_flags { }; struct ieee80211_rx_data { - struct napi_struct *napi; + struct list_head *list; struct sk_buff *skb; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9ac3eaa00ce6..836cde516a18 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2578,8 +2578,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, memset(skb->cb, 0, sizeof(skb->cb)); /* deliver to local stack */ - if (rx->napi) - napi_gro_receive(rx->napi, skb); + if (rx->list) + list_add_tail(&skb->list, rx->list); else netif_receive_skb(skb); } @@ -3869,7 +3869,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .napi = NULL, /* must be NULL to not have races */ }; struct tid_ampdu_rx *tid_agg_rx; @@ -4479,8 +4478,8 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, /* deliver to local stack */ skb->protocol = eth_type_trans(skb, fast_rx->dev); memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->napi) - napi_gro_receive(rx->napi, skb); + if (rx->list) + list_add_tail(&skb->list, rx->list); else netif_receive_skb(skb); @@ -4547,7 +4546,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, struct sk_buff *skb, - struct napi_struct *napi) + struct list_head *list) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; @@ -4562,7 +4561,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, memset(&rx, 0, sizeof(rx)); rx.skb = skb; rx.local = local; - rx.napi = napi; + rx.list = list; if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) I802_DEBUG_INC(local->dot11ReceivedFragmentCount); @@ -4670,8 +4669,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, * This is the receive path handler. It is called by a low level driver when an * 802.11 MPDU is received from the hardware. */ -void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, - struct sk_buff *skb, struct napi_struct *napi) +void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, + struct sk_buff *skb, struct list_head *list) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate = NULL; @@ -4762,13 +4761,6 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, status->rx_flags = 0; - /* - * key references and virtual interfaces are protected using RCU - * and this requires that we are in a read-side RCU section during - * receive processing - */ - rcu_read_lock(); - /* * Frames with failed FCS/PLCP checksum are not returned, * all other frames are returned without radiotap header @@ -4776,23 +4768,47 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, * Also, frames with less than 16 bytes are dropped. */ skb = ieee80211_rx_monitor(local, skb, rate); - if (!skb) { - rcu_read_unlock(); + if (!skb) return; - } ieee80211_tpt_led_trig_rx(local, ((struct ieee80211_hdr *)skb->data)->frame_control, skb->len); - __ieee80211_rx_handle_packet(hw, pubsta, skb, napi); - - rcu_read_unlock(); + __ieee80211_rx_handle_packet(hw, pubsta, skb, list); return; drop: kfree_skb(skb); } +EXPORT_SYMBOL(ieee80211_rx_list); + +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, + struct sk_buff *skb, struct napi_struct *napi) +{ + struct sk_buff *tmp; + LIST_HEAD(list); + + + /* + * key references and virtual interfaces are protected using RCU + * and this requires that we are in a read-side RCU section during + * receive processing + */ + rcu_read_lock(); + ieee80211_rx_list(hw, pubsta, skb, &list); + rcu_read_unlock(); + + if (!napi) { + netif_receive_skb_list(&list); + return; + } + + list_for_each_entry_safe(skb, tmp, &list, list) { + skb_list_del_init(skb); + napi_gro_receive(napi, skb); + } +} EXPORT_SYMBOL(ieee80211_rx_napi); /* This is a version of the rx handler that can be called from hard irq -- cgit From 75e6b594bbaeeb3f8287a2e6eb8811384b8c7195 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Jul 2020 13:00:52 +0200 Subject: cfg80211: invert HE BSS color 'disabled' to 'enabled' This is in fact 'disabled' in the spec, but there it's in a place where that actually makes sense. In our internal data structures, it doesn't really make sense, and in fact the previous commit just fixed a bug in that area. Make this safer by inverting the polarity from 'disabled' to 'enabled'. Link: https://lore.kernel.org/r/20200730130051.5d8399545bd9.Ie62fdcd1a6cd9c969315bc124084a494ca6c8df3@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- net/mac80211/mlme.c | 8 ++++---- net/wireless/nl80211.c | 7 ++----- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 23ef0ebaf180..24e9e00decb8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1019,7 +1019,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); changed |= BSS_CHANGED_HE_OBSS_PD; - if (!params->he_bss_color.disabled) + if (params->he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8a92a62dc54d..839d0367446c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3460,11 +3460,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, bss_conf->he_bss_color.partial = le32_get_bits(elems->he_operation->he_oper_params, IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR); - bss_conf->he_bss_color.disabled = - le32_get_bits(elems->he_operation->he_oper_params, - IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); + bss_conf->he_bss_color.enabled = + !le32_get_bits(elems->he_operation->he_oper_params, + IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); - if (!bss_conf->he_bss_color.disabled) + if (bss_conf->he_bss_color.enabled) changed |= BSS_CHANGED_HE_BSS_COLOR; bss_conf->htc_trig_based_pkt_ext = diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b4048f3c5134..8d78a6fc59a3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4713,8 +4713,8 @@ static int nl80211_parse_he_bss_color(struct nlattr *attrs, he_bss_color->color = nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); - he_bss_color->disabled = - nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); + he_bss_color->enabled = + !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); he_bss_color->partial = nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); @@ -4865,9 +4865,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); - /* disable BSS color by default */ - params.he_bss_color.disabled = true; - /* these are required for START_AP */ if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || !info->attrs[NL80211_ATTR_DTIM_PERIOD] || -- cgit From f96622749a67d40ad5efe8a58d5fc95313097aa0 Mon Sep 17 00:00:00 2001 From: Chung-Hsien Hsu Date: Tue, 23 Jun 2020 08:49:35 -0500 Subject: nl80211: support 4-way handshake offloading for WPA/WPA2-PSK in AP mode Let drivers advertise support for AP-mode WPA/WPA2-PSK 4-way handshake offloading with a new NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK flag. Extend use of NL80211_ATTR_PMK attribute indicating it might be passed as part of NL80211_CMD_START_AP command, and contain the PSK (which is the PMK, hence the name). The driver is assumed to handle the 4-way handshake by itself in this case, instead of relying on userspace. Signed-off-by: Chung-Hsien Hsu Signed-off-by: Chi-Hsien Lin Link: https://lore.kernel.org/r/20200623134938.39997-2-chi-hsien.lin@cypress.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8d78a6fc59a3..a096682ec0ad 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9442,7 +9442,9 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK)) + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK) && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK)) return -EINVAL; settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]); } -- cgit From c8ad010665c0b85c6a35466b2159e907b8dd85d1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Jul 2020 15:52:13 +0200 Subject: mac80211: warn only once in check_sdata_in_driver() at each caller Ben Greear has repeatedly reported in the past (for a few years probably) that this triggers repeatedly in certain scenarios. Make this a macro so that each callsite can trigger the warning only once - that will still give us an idea of what's going on and what paths can reach it, but avoids being too noisy. Link: https://lore.kernel.org/r/20200730155212.06fd3a95dbfb.I0b16829aabfaf5f642bce401502a29d16e2dd444@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index de69fc9c4f07..41d495d73d3a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -12,12 +12,11 @@ #include "ieee80211_i.h" #include "trace.h" -static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata) -{ - return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), - "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", - sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); -} +#define check_sdata_in_driver(sdata) ({ \ + !WARN_ONCE(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), \ + "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", \ + sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); \ +}) static inline struct ieee80211_sub_if_data * get_bss_sdata(struct ieee80211_sub_if_data *sdata) -- cgit From 79bf118957a1966344b247d240434d8047d02ac9 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 30 Jul 2020 19:40:11 +0200 Subject: Bluetooth: Increment management interface revision Increment the mgmt revision due to the recently added new commands. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4ec0fee80344..5bbe71002fb9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -40,7 +40,7 @@ #include "msft.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 17 +#define MGMT_REVISION 18 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, -- cgit From 075f77324f90149bac12c8a705dae5786a1d24fb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 31 Jul 2020 17:41:58 +1000 Subject: Bluetooth: Remove CRYPTO_ALG_INTERNAL flag The flag CRYPTO_ALG_INTERNAL is not meant to be used outside of the Crypto API. It isn't needed here anyway. Signed-off-by: Herbert Xu Signed-off-by: Johan Hedberg --- net/bluetooth/selftest.c | 2 +- net/bluetooth/smp.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index 03e3c89c3046..f71c6fa65fb3 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -205,7 +205,7 @@ static int __init test_ecdh(void) calltime = ktime_get(); - tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + tfm = crypto_alloc_kpp("ecdh", 0, 0); if (IS_ERR(tfm)) { BT_ERR("Unable to create ECDH crypto context"); err = PTR_ERR(tfm); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index c5c812e8130e..4cfd05efc548 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1391,7 +1391,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) goto zfree_smp; } - smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + smp->tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0); if (IS_ERR(smp->tfm_ecdh)) { BT_ERR("Unable to create ECDH crypto context"); goto free_shash; @@ -3286,7 +3286,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) return ERR_CAST(tfm_cmac); } - tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0); if (IS_ERR(tfm_ecdh)) { BT_ERR("Unable to create ECDH crypto context"); crypto_free_shash(tfm_cmac); @@ -3851,7 +3851,7 @@ int __init bt_selftest_smp(void) return PTR_ERR(tfm_cmac); } - tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0); if (IS_ERR(tfm_ecdh)) { BT_ERR("Unable to create ECDH crypto context"); crypto_free_shash(tfm_cmac); -- cgit From ffe8923f109b7ea92c0842c89e61300eefa11c94 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Jul 2020 13:34:46 +0200 Subject: netfilter: nft_compat: make sure xtables destructors have run Pablo Neira found that after recent update of xt_IDLETIMER the iptables-nft tests sometimes show an error. He tracked this down to the delayed cleanup used by nf_tables core: del rule (transaction A) add rule (transaction B) Its possible that by time transaction B (both in same netns) runs, the xt target destructor has not been invoked yet. For native nft expressions this is no problem because all expressions that have such side effects make sure these are handled from the commit phase, rather than async cleanup. For nft_compat however this isn't true. Instead of forcing synchronous behaviour for nft_compat, keep track of the number of outstanding destructor calls. When we attempt to create a new expression, flush the cleanup worker to make sure destructors have completed. With lots of help from Pablo Neira. Reported-by: Pablo Neira Ayso Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 10 ++++++++-- net/netfilter/nft_compat.c | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 88325b264737..79e4db3cadec 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7290,6 +7290,12 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) } } +void nf_tables_trans_destroy_flush_work(void) +{ + flush_work(&trans_destroy_work); +} +EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); + static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { struct nft_rule *rule; @@ -7472,9 +7478,9 @@ static void nf_tables_commit_release(struct net *net) spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); - mutex_unlock(&net->nft.commit_mutex); - schedule_work(&trans_destroy_work); + + mutex_unlock(&net->nft.commit_mutex); } static int nf_tables_commit(struct net *net, struct sk_buff *skb) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index aa1a066cb74b..6428856ccbec 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,6 +27,8 @@ struct nft_xt_match_priv { void *info; }; +static refcount_t nft_compat_pending_destroy = REFCOUNT_INIT(1); + static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx, const char *tablename) { @@ -236,6 +238,15 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); + /* xtables matches or targets can have side effects, e.g. + * creation/destruction of /proc files. + * The xt ->destroy functions are run asynchronously from + * work queue. If we have pending invocations we thus + * need to wait for those to finish. + */ + if (refcount_read(&nft_compat_pending_destroy) > 1) + nf_tables_trans_destroy_flush_work(); + ret = xt_check_target(&par, size, proto, inv); if (ret < 0) return ret; @@ -247,6 +258,13 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return 0; } +static void __nft_mt_tg_destroy(struct module *me, const struct nft_expr *expr) +{ + refcount_dec(&nft_compat_pending_destroy); + module_put(me); + kfree(expr->ops); +} + static void nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -262,8 +280,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - module_put(me); - kfree(expr->ops); + __nft_mt_tg_destroy(me, expr); } static int nft_extension_dump_info(struct sk_buff *skb, int attr, @@ -494,8 +511,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, if (par.match->destroy != NULL) par.match->destroy(&par); - module_put(me); - kfree(expr->ops); + __nft_mt_tg_destroy(me, expr); } static void @@ -700,6 +716,14 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static struct nft_expr_type nft_match_type; +static void nft_mt_tg_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) +{ + if (phase == NFT_TRANS_COMMIT) + refcount_inc(&nft_compat_pending_destroy); +} + static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -738,6 +762,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, ops->type = &nft_match_type; ops->eval = nft_match_eval; ops->init = nft_match_init; + ops->deactivate = nft_mt_tg_deactivate, ops->destroy = nft_match_destroy; ops->dump = nft_match_dump; ops->validate = nft_match_validate; @@ -828,6 +853,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); ops->init = nft_target_init; ops->destroy = nft_target_destroy; + ops->deactivate = nft_mt_tg_deactivate, ops->dump = nft_target_dump; ops->validate = nft_target_validate; ops->data = target; @@ -891,6 +917,8 @@ static void __exit nft_compat_module_exit(void) nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); + + WARN_ON_ONCE(refcount_read(&nft_compat_pending_destroy) != 1); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); -- cgit From 83a33b248763f081bbccc7f7f61264883a9e3338 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 31 Jul 2020 16:15:03 -0700 Subject: bluetooth: sco: Fix sockptr reference. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/bluetooth/sco.c: In function ‘sco_sock_setsockopt’: net/bluetooth/sco.c:862:3: error: cannot convert to a pointer type 862 | if (get_user(opt, (u32 __user *)optval)) { | ^~ Signed-off-by: David S. Miller --- net/bluetooth/sco.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 6e6b03844a2a..dcf7f96ff417 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -859,7 +859,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_PKT_STATUS: - if (get_user(opt, (u32 __user *)optval)) { + if (copy_from_sockptr(&opt, optval, sizeof(u32))) { err = -EFAULT; break; } -- cgit From 9fc95f50eedb5cfe5d53a9c970c47ed680fc4e54 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 30 Jul 2020 19:02:36 +0800 Subject: net: Pass NULL to skb_network_protocol() when we don't care about vlan depth When we don't care about vlan depth, we could pass NULL instead of the address of a unused local variable to skb_network_protocol() as a param. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fe2e387eed29..38a6371d9bc5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3454,10 +3454,9 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t features) { - int tmp; __be16 type; - type = skb_network_protocol(skb, &tmp); + type = skb_network_protocol(skb, NULL); features = net_mpls_features(skb, features, type); if (skb->ip_summed != CHECKSUM_NONE && -- cgit From cc0b065fd550aee840dbca84e1c3ff667099b461 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Jul 2020 18:09:04 +0300 Subject: hsr: Use %pM format specifier for MAC addresses Convert to %pM instead of using custom code. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- net/hsr/hsr_debugfs.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c index 3b6f675bd55a..7e11a6c35bc3 100644 --- a/net/hsr/hsr_debugfs.c +++ b/net/hsr/hsr_debugfs.c @@ -22,12 +22,6 @@ static struct dentry *hsr_debugfs_root_dir; -static void print_mac_address(struct seq_file *sfp, unsigned char *mac) -{ - seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x ", - mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); -} - /* hsr_node_table_show - Formats and prints node_table entries */ static int hsr_node_table_show(struct seq_file *sfp, void *data) @@ -49,8 +43,8 @@ hsr_node_table_show(struct seq_file *sfp, void *data) /* skip self node */ if (hsr_addr_is_self(priv, node->macaddress_A)) continue; - print_mac_address(sfp, &node->macaddress_A[0]); - print_mac_address(sfp, &node->macaddress_B[0]); + seq_printf(sfp, "%pM ", &node->macaddress_A[0]); + seq_printf(sfp, "%pM ", &node->macaddress_B[0]); seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_A]); seq_printf(sfp, "%10lx, ", node->time_in[HSR_PT_SLAVE_B]); seq_printf(sfp, "%14x, ", node->addr_B_port); -- cgit From 77aec5e1c4935e37c8cbb818a69f2f7d3433ede5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 30 Jul 2020 11:03:14 -0500 Subject: net/sched: cls_u32: Use struct_size() helper Make use of the struct_size() helper, in multiple places, instead of an open-coded version in order to avoid any potential type mistakes and protect against potential integer overflows. Also, remove unnecessary object identifier size. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 771b068f8254..7b69ab1993ba 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -852,9 +852,6 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32 htid, flags = 0; size_t sel_size; int err; -#ifdef CONFIG_CLS_U32_PERF - size_t size; -#endif if (!opt) { if (handle) { @@ -1022,15 +1019,15 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, goto erridr; } - n = kzalloc(offsetof(typeof(*n), sel) + sel_size, GFP_KERNEL); + n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL); if (n == NULL) { err = -ENOBUFS; goto erridr; } #ifdef CONFIG_CLS_U32_PERF - size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); - n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); + n->pf = __alloc_percpu(struct_size(n->pf, kcnts, s->nkeys), + __alignof__(struct tc_u32_pcnt)); if (!n->pf) { err = -ENOBUFS; goto errfree; @@ -1294,8 +1291,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, int cpu; #endif - if (nla_put(skb, TCA_U32_SEL, - sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), + if (nla_put(skb, TCA_U32_SEL, struct_size(&n->sel, keys, n->sel.nkeys), &n->sel)) goto nla_put_failure; @@ -1345,9 +1341,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, goto nla_put_failure; } #ifdef CONFIG_CLS_U32_PERF - gpf = kzalloc(sizeof(struct tc_u32_pcnt) + - n->sel.nkeys * sizeof(u64), - GFP_KERNEL); + gpf = kzalloc(struct_size(gpf, kcnts, n->sel.nkeys), GFP_KERNEL); if (!gpf) goto nla_put_failure; @@ -1361,9 +1355,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, gpf->kcnts[i] += pf->kcnts[i]; } - if (nla_put_64bit(skb, TCA_U32_PCNT, - sizeof(struct tc_u32_pcnt) + - n->sel.nkeys * sizeof(u64), + if (nla_put_64bit(skb, TCA_U32_PCNT, struct_size(gpf, kcnts, n->sel.nkeys), gpf, TCA_U32_PAD)) { kfree(gpf); goto nla_put_failure; -- cgit From f8ace8d915b88bd1bbaac695de94650dbb25c7b4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:50 +0200 Subject: tcp: rename request_sock cookie_ts bit to syncookie Nowadays output function has a 'synack_type' argument that tells us when the syn/ack is emitted via syncookies. The request already tells us when timestamps are supported, so check both to detect special timestamp for tcp option encoding is needed. We could remove cookie_ts altogether, but a followup patch would otherwise need to adjust function signatures to pass 'want_cookie' to mptcp core. This way, the 'existing' bit can be used. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a018bafd7bdf..11a6f128e51c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6519,7 +6519,6 @@ static void tcp_openreq_init(struct request_sock *req, struct inet_request_sock *ireq = inet_rsk(req); req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */ - req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tcp_rsk(req)->snt_synack = 0; @@ -6674,6 +6673,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!req) goto drop; + req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; #if IS_ENABLED(CONFIG_MPTCP) @@ -6739,7 +6739,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d8f16f6a9b02..85ff417bda7f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3393,7 +3393,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES - if (unlikely(req->cookie_ts)) + if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) skb->skb_mstamp_ns = cookie_init_timestamp(req, now); else #endif -- cgit From 535fb8152f313dd5d30ef84ce55b01ad9cbae3cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:51 +0200 Subject: mptcp: token: move retry to caller Once syncookie support is added, no state will be stored anymore when the syn/ack is generated in syncookie mode. When the ACK comes back, the generated key will be taken from the TCP ACK, the token is re-generated and inserted into the token tree. This means we can't retry with a new key when the token is already taken in the syncookie case. Therefore, move the retry logic to the caller to prepare for syncookie support in mptcp. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 9 ++++++++- net/mptcp/token.c | 12 ++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1c8482bc2ce5..9feb87880d1c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -126,11 +126,18 @@ static void subflow_init_req(struct request_sock *req, } if (mp_opt.mp_capable && listener->request_mptcp) { - int err; + int err, retries = 4; + +again: + do { + get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); + } while (subflow_req->local_key == 0); err = mptcp_token_new_request(req); if (err == 0) subflow_req->mp_capable = 1; + else if (retries-- > 0) + goto again; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; } else if (mp_opt.mp_join && listener->request_mptcp) { diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 97cfc45bcc4f..f82410c54653 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -109,14 +109,12 @@ static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn) int mptcp_token_new_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - int retries = TOKEN_MAX_RETRIES; struct token_bucket *bucket; u32 token; -again: - mptcp_crypto_key_gen_sha(&subflow_req->local_key, - &subflow_req->token, - &subflow_req->idsn); + mptcp_crypto_key_sha(subflow_req->local_key, + &subflow_req->token, + &subflow_req->idsn); pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n", req, subflow_req->local_key, subflow_req->token, subflow_req->idsn); @@ -126,9 +124,7 @@ again: spin_lock_bh(&bucket->lock); if (__token_bucket_busy(bucket, token)) { spin_unlock_bh(&bucket->lock); - if (!--retries) - return -EBUSY; - goto again; + return -EBUSY; } hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain); -- cgit From 78d8b7bc4b32e2d32ac19d3b217166224c4342d0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:52 +0200 Subject: mptcp: subflow: split subflow_init_req When syncookie support is added, we will need to add a variant of subflow_init_req() helper. It will do almost same thing except that it will not compute/add a token to the mptcp token tree. To avoid excess copy&paste, this commit splits away part of the code into a new helper, __subflow_init_req, that can then be re-used from the 'no insert' function added in a followup change. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9feb87880d1c..091e305a81c8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -91,17 +91,9 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, return msk; } -static void subflow_init_req(struct request_sock *req, - const struct sock *sk_listener, - struct sk_buff *skb) +static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) { - struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct mptcp_options_received mp_opt; - - pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - - mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; @@ -113,9 +105,29 @@ static void subflow_init_req(struct request_sock *req, * TCP option space. */ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) - return; + return -EINVAL; #endif + return 0; +} + +static void subflow_init_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); + struct mptcp_options_received mp_opt; + int ret; + + pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); + + ret = __subflow_init_req(req, sk_listener); + if (ret) + return; + + mptcp_get_options(skb, &mp_opt); + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); -- cgit From 08b8d080982fec354173d3fd28a3106a719b8950 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:53 +0200 Subject: mptcp: rename and export mptcp_subflow_request_sock_ops syncookie code path needs to create an mptcp request sock. Prepare for this and add mptcp prefix plus needed export of ops struct. Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 091e305a81c8..9b11d2b6ff4d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -284,7 +284,8 @@ do_reset: tcp_done(sk); } -static struct request_sock_ops subflow_request_sock_ops; +struct request_sock_ops mptcp_subflow_request_sock_ops; +EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops); static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -297,7 +298,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - return tcp_conn_request(&subflow_request_sock_ops, + return tcp_conn_request(&mptcp_subflow_request_sock_ops, &subflow_request_sock_ipv4_ops, sk, skb); drop: @@ -322,7 +323,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; - return tcp_conn_request(&subflow_request_sock_ops, + return tcp_conn_request(&mptcp_subflow_request_sock_ops, &subflow_request_sock_ipv6_ops, sk, skb); drop: @@ -1311,8 +1312,8 @@ static int subflow_ops_init(struct request_sock_ops *subflow_ops) void __init mptcp_subflow_init(void) { - subflow_request_sock_ops = tcp_request_sock_ops; - if (subflow_ops_init(&subflow_request_sock_ops) != 0) + mptcp_subflow_request_sock_ops = tcp_request_sock_ops; + if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) panic("MPTCP: failed to init subflow request sock ops\n"); subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; -- cgit From c83a47e50d8fd3825a4758158e9edd5acdc74185 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:54 +0200 Subject: mptcp: subflow: add mptcp_subflow_init_cookie_req helper Will be used to initialize the mptcp request socket when a MP_CAPABLE request was handled in syncookie mode, i.e. when a TCP ACK containing a MP_CAPABLE option is a valid syncookie value. Normally (non-cookie case), MPTCP will generate a unique 32 bit connection ID and stores it in the MPTCP token storage to be able to retrieve the mptcp socket for subflow joining. In syncookie case, we do not want to store any state, so just generate the unique ID and use it in the reply. This means there is a small window where another connection could generate the same token. When Cookie ACK comes back, we check that the token has not been registered in the mean time. If it was, the connection needs to fall back to TCP. Changes in v2: - use req->syncookie instead of passing 'want_cookie' arg to ->init_req() (Eric Dumazet) Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- net/mptcp/token.c | 26 ++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index beb34b8a5363..d76d3b40d69e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -400,6 +400,7 @@ void mptcp_token_destroy_request(struct request_sock *req); int mptcp_token_new_connect(struct sock *sk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); +bool mptcp_token_exists(u32 token); struct mptcp_sock *mptcp_token_get_sock(u32 token); struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9b11d2b6ff4d..3d346572d4c9 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -140,18 +140,31 @@ static void subflow_init_req(struct request_sock *req, if (mp_opt.mp_capable && listener->request_mptcp) { int err, retries = 4; + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; again: do { get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); } while (subflow_req->local_key == 0); + if (unlikely(req->syncookie)) { + mptcp_crypto_key_sha(subflow_req->local_key, + &subflow_req->token, + &subflow_req->idsn); + if (mptcp_token_exists(subflow_req->token)) { + if (retries-- > 0) + goto again; + } else { + subflow_req->mp_capable = 1; + } + return; + } + err = mptcp_token_new_request(req); if (err == 0) subflow_req->mp_capable = 1; else if (retries-- > 0) goto again; - subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; @@ -165,6 +178,41 @@ again: } } +int mptcp_subflow_init_cookie_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); + struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); + struct mptcp_options_received mp_opt; + int err; + + err = __subflow_init_req(req, sk_listener); + if (err) + return err; + + mptcp_get_options(skb, &mp_opt); + + if (mp_opt.mp_capable && mp_opt.mp_join) + return -EINVAL; + + if (mp_opt.mp_capable && listener->request_mptcp) { + if (mp_opt.sndr_key == 0) + return -EINVAL; + + subflow_req->local_key = mp_opt.rcvr_key; + err = mptcp_token_new_request(req); + if (err) + return err; + + subflow_req->mp_capable = 1; + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); + static void subflow_v4_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) diff --git a/net/mptcp/token.c b/net/mptcp/token.c index f82410c54653..8b47c4bb1c6b 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -204,6 +204,32 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req, spin_unlock_bh(&bucket->lock); } +bool mptcp_token_exists(u32 token) +{ + struct hlist_nulls_node *pos; + struct token_bucket *bucket; + struct mptcp_sock *msk; + struct sock *sk; + + rcu_read_lock(); + bucket = token_bucket(token); + +again: + sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { + msk = mptcp_sk(sk); + if (READ_ONCE(msk->token) == token) + goto found; + } + if (get_nulls_value(pos) != (token & token_mask)) + goto again; + + rcu_read_unlock(); + return false; +found: + rcu_read_unlock(); + return true; +} + /** * mptcp_token_get_sock - retrieve mptcp connection sock using its token * @token: token of the mptcp connection to retrieve -- cgit From 6fc8c827dd4fa615965c4eac9bbfd465f6eb8fb4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:55 +0200 Subject: tcp: syncookies: create mptcp request socket for ACK cookies with MPTCP option If SYN packet contains MP_CAPABLE option, keep it enabled. Syncokie validation and cookie-based socket creation is changed to instantiate an mptcp request sockets if the ACK contains an MPTCP connection request. Rather than extend both cookie_v4/6_check, add a common helper to create the (mp)tcp request socket. Suggested-by: Paolo Abeni Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 38 ++++++++++++++++++++++++++++++++++---- net/ipv4/tcp_input.c | 3 --- net/ipv6/syncookies.c | 5 +---- 3 files changed, 35 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9a4f6b16c9bc..54838ee2e8d4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -276,6 +276,39 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, } EXPORT_SYMBOL(cookie_ecn_ok); +struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk, + struct sk_buff *skb) +{ + struct tcp_request_sock *treq; + struct request_sock *req; + +#ifdef CONFIG_MPTCP + if (sk_is_mptcp(sk)) + ops = &mptcp_subflow_request_sock_ops; +#endif + + req = inet_reqsk_alloc(ops, sk, false); + if (!req) + return NULL; + +#if IS_ENABLED(CONFIG_MPTCP) + treq = tcp_rsk(req); + treq->is_mptcp = sk_is_mptcp(sk); + if (treq->is_mptcp) { + int err = mptcp_subflow_init_cookie_req(req, sk, skb); + + if (err) { + reqsk_free(req); + return NULL; + } + } +#endif + + return req; +} +EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); + /* On input, sk is a listener. * Output is listener if incoming packet would not create a child * NULL if memory could not be allocated. @@ -326,7 +359,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */ + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); if (!req) goto out; @@ -350,9 +383,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = 0; treq->tfo_listener = false; - if (IS_ENABLED(CONFIG_MPTCP)) - treq->is_mptcp = 0; - if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 11a6f128e51c..739da25b0c23 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6701,9 +6701,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->init_req(req, sk, skb); - if (IS_ENABLED(CONFIG_MPTCP) && want_cookie) - tcp_rsk(req)->is_mptcp = 0; - if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 13235a012388..e796a64be308 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -170,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); if (!req) goto out; @@ -178,9 +178,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->tfo_listener = false; - if (IS_ENABLED(CONFIG_MPTCP)) - treq->is_mptcp = 0; - if (security_inet_conn_request(sk, skb, req)) goto out_free; -- cgit From 9466a1ccebbe54ac57fb8a89c2b4b854826546a8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Jul 2020 21:25:56 +0200 Subject: mptcp: enable JOIN requests even if cookies are in use JOIN requests do not work in syncookie mode -- for HMAC validation, the peers nonce and the mptcp token (to obtain the desired connection socket the join is for) are required, but this information is only present in the initial syn. So either we need to drop all JOIN requests once a listening socket enters syncookie mode, or we need to store enough state to reconstruct the request socket later. This adds a state table (1024 entries) to store the data present in the MP_JOIN syn request and the random nonce used for the cookie syn/ack. When a MP_JOIN ACK passed cookie validation, the table is consulted to rebuild the request socket from it. An alternate approach would be to "cancel" syn-cookie mode and force MP_JOIN to always use a syn queue entry. However, doing so brings the backlog over the configured queue limit. v2: use req->syncookie, not (removed) want_cookie arg Suggested-by: Paolo Abeni Signed-off-by: Florian Westphal Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 6 +++ net/mptcp/Makefile | 1 + net/mptcp/ctrl.c | 1 + net/mptcp/protocol.h | 20 ++++++++ net/mptcp/subflow.c | 14 ++++++ net/mptcp/syncookies.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 174 insertions(+) create mode 100644 net/mptcp/syncookies.c (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 54838ee2e8d4..11b20474be83 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -212,6 +212,12 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); + + if (tcp_rsk(req)->drop_req) { + refcount_set(&req->rsk_refcnt, 2); + return child; + } + if (inet_csk_reqsk_queue_add(sk, req, child)) return child; diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 2360cbd27d59..a611968be4d7 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ mib.o pm_netlink.o +obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o mptcp_crypto_test-objs := crypto_test.o diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 8e39585d37f3..54b888f94009 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -112,6 +112,7 @@ static struct pernet_operations mptcp_pernet_ops = { void __init mptcp_init(void) { + mptcp_join_cookie_init(); mptcp_proto_init(); if (register_pernet_subsys(&mptcp_pernet_ops) < 0) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d76d3b40d69e..60b27d44c184 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -506,4 +506,24 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) !subflow->conn_finished; } +#ifdef CONFIG_SYN_COOKIES +void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb); +bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb); +void __init mptcp_join_cookie_init(void); +#else +static inline void +subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) {} +static inline bool +mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) +{ + return false; +} + +static inline void mptcp_join_cookie_init(void) {} +#endif + #endif /* __MPTCP_PROTOCOL_H */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 3d346572d4c9..a4cc4591bd4e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -173,6 +173,12 @@ again: subflow_req->token = mp_opt.token; subflow_req->remote_nonce = mp_opt.nonce; subflow_req->msk = subflow_token_join_request(req, skb); + + if (unlikely(req->syncookie) && subflow_req->msk) { + if (mptcp_can_accept_new_subflow(subflow_req->msk)) + subflow_init_req_cookie_join_save(subflow_req, skb); + } + pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, subflow_req->remote_nonce, subflow_req->msk); } @@ -207,6 +213,14 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; + } else if (mp_opt.mp_join && listener->request_mptcp) { + if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) + return -EINVAL; + + if (mptcp_can_accept_new_subflow(subflow_req->msk)) + subflow_req->mp_join = 1; + + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; } return 0; diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c new file mode 100644 index 000000000000..6eb992789b50 --- /dev/null +++ b/net/mptcp/syncookies.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include "protocol.h" + +/* Syncookies do not work for JOIN requests. + * + * Unlike MP_CAPABLE, where the ACK cookie contains the needed MPTCP + * options to reconstruct the initial syn state, MP_JOIN does not contain + * the token to obtain the mptcp socket nor the server-generated nonce + * that was used in the cookie SYN/ACK response. + * + * Keep a small best effort state table to store the syn/synack data, + * indexed by skb hash. + * + * A MP_JOIN SYN packet handled by syn cookies is only stored if the 32bit + * token matches a known mptcp connection that can still accept more subflows. + * + * There is no timeout handling -- state is only re-constructed + * when the TCP ACK passed the cookie validation check. + */ + +struct join_entry { + u32 token; + u32 remote_nonce; + u32 local_nonce; + u8 join_id; + u8 local_id; + u8 backup; + u8 valid; +}; + +#define COOKIE_JOIN_SLOTS 1024 + +static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp; +static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp; + +static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net) +{ + u32 i = skb_get_hash(skb) ^ net_hash_mix(net); + + return i % ARRAY_SIZE(join_entries); +} + +static void mptcp_join_store_state(struct join_entry *entry, + const struct mptcp_subflow_request_sock *subflow_req) +{ + entry->token = subflow_req->token; + entry->remote_nonce = subflow_req->remote_nonce; + entry->local_nonce = subflow_req->local_nonce; + entry->backup = subflow_req->backup; + entry->join_id = subflow_req->remote_id; + entry->local_id = subflow_req->local_id; + entry->valid = 1; +} + +void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) +{ + struct net *net = read_pnet(&subflow_req->sk.req.ireq_net); + u32 i = mptcp_join_entry_hash(skb, net); + + /* No use in waiting if other cpu is already using this slot -- + * would overwrite the data that got stored. + */ + spin_lock_bh(&join_entry_locks[i]); + mptcp_join_store_state(&join_entries[i], subflow_req); + spin_unlock_bh(&join_entry_locks[i]); +} + +/* Called for a cookie-ack with MP_JOIN option present. + * Look up the saved state based on skb hash & check token matches msk + * in same netns. + * + * Caller will check msk can still accept another subflow. The hmac + * present in the cookie ACK mptcp option space will be checked later. + */ +bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, + struct sk_buff *skb) +{ + struct net *net = read_pnet(&subflow_req->sk.req.ireq_net); + u32 i = mptcp_join_entry_hash(skb, net); + struct mptcp_sock *msk; + struct join_entry *e; + + e = &join_entries[i]; + + spin_lock_bh(&join_entry_locks[i]); + + if (e->valid == 0) { + spin_unlock_bh(&join_entry_locks[i]); + return false; + } + + e->valid = 0; + + msk = mptcp_token_get_sock(e->token); + if (!msk) { + spin_unlock_bh(&join_entry_locks[i]); + return false; + } + + /* If this fails, the token got re-used in the mean time by another + * mptcp socket in a different netns, i.e. entry is outdated. + */ + if (!net_eq(sock_net((struct sock *)msk), net)) + goto err_put; + + subflow_req->remote_nonce = e->remote_nonce; + subflow_req->local_nonce = e->local_nonce; + subflow_req->backup = e->backup; + subflow_req->remote_id = e->join_id; + subflow_req->token = e->token; + subflow_req->msk = msk; + spin_unlock_bh(&join_entry_locks[i]); + return true; + +err_put: + spin_unlock_bh(&join_entry_locks[i]); + sock_put((struct sock *)msk); + return false; +} + +void __init mptcp_join_cookie_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(join_entry_locks); i++) + spin_lock_init(&join_entry_locks[i]); + + BUILD_BUG_ON(ARRAY_SIZE(join_entry_locks) != ARRAY_SIZE(join_entries)); +} -- cgit From 48040793fa6003d211f021c6ad273477bcd90d91 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 30 Jul 2020 15:44:40 -0700 Subject: tcp: add earliest departure time to SCM_TIMESTAMPING_OPT_STATS This change adds TCP_NLA_EDT to SCM_TIMESTAMPING_OPT_STATS that reports the earliest departure time(EDT) of the timestamped skb. By tracking EDT values of the skb from different timestamps, we can observe when and how much the value changed. This allows to measure the precise delay injected on the sender host e.g. by a bpf-base throttler. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- net/ipv4/tcp.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8afefe6f6b6..4e2edfbe0e19 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4692,7 +4692,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - skb = tcp_get_timestamping_opt_stats(sk); + skb = tcp_get_timestamping_opt_stats(sk, orig_skb); opt_stats = true; } else #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4afec552f211..c06d2bfd2ec4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3501,10 +3501,12 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ + nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */ 0; } -struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) +struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, + const struct sk_buff *orig_skb) { const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; @@ -3558,6 +3560,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, max_t(int, 0, tp->write_seq - tp->snd_nxt)); + nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, + TCP_NLA_PAD); return stats; } -- cgit From 829eb208e80d6db95c0201cb8fa00c2f9ad87faf Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 31 Jul 2020 17:34:01 -0700 Subject: rtnetlink: add support for protodown reason netdev protodown is a mechanism that allows protocols to hold an interface down. It was initially introduced in the kernel to hold links down by a multihoming protocol. There was also an attempt to introduce protodown reason at the time but was rejected. protodown and protodown reason is supported by almost every switching and routing platform. It was ok for a while to live without a protodown reason. But, its become more critical now given more than one protocol may need to keep a link down on a system at the same time. eg: vrrp peer node, port security, multihoming protocol. Its common for Network operators and protocol developers to look for such a reason on a networking box (Its also known as errDisable by most networking operators) This patch adds support for link protodown reason attribute. There are two ways to maintain protodown reasons. (a) enumerate every possible reason code in kernel - A protocol developer has to make a request and have that appear in a certain kernel version (b) provide the bits in the kernel, and allow user-space (sysadmin or NOS distributions) to manage the bit-to-reasonname map. - This makes extending reason codes easier (kind of like the iproute2 table to vrf-name map /etc/iproute2/rt_tables.d/) This patch takes approach (b). a few things about the patch: - It treats the protodown reason bits as counter to indicate active protodown users - Since protodown attribute is already an exposed UAPI, the reason is not enforced on a protodown set. Its a no-op if not used. the patch follows the below algorithm: - presence of reason bits set indicates protodown is in use - user can set protodown and protodown reason in a single or multiple setlink operations - setlink operation to clear protodown, will return -EBUSY if there are active protodown reason bits - reason is not included in link dumps if not used example with patched iproute2: $cat /etc/iproute2/protodown_reasons.d/r.conf 0 mlag 1 evpn 2 vrrp 3 psecurity $ip link set dev vxlan0 protodown on protodown_reason vrrp on $ip link set dev vxlan0 protodown_reason mlag on $ip link show 14: vxlan0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether f6:06:be:17:91:e7 brd ff:ff:ff:ff:ff:ff protodown on $ip link set dev vxlan0 protodown_reason mlag off $ip link set dev vxlan0 protodown off protodown_reason vrrp off Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/dev.c | 25 ++++++++++++ net/core/rtnetlink.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 133 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 38a6371d9bc5..f7ef0f5c5569 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8715,6 +8715,31 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down_generic); +/** + * dev_change_proto_down_reason - proto down reason + * + * @dev: device + * @mask: proto down mask + * @value: proto down value + */ +void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, + u32 value) +{ + int b; + + if (!mask) { + dev->proto_down_reason = value; + } else { + for_each_set_bit(b, &mask, 32) { + if (value & (1 << b)) + dev->proto_down_reason |= BIT(b); + else + dev->proto_down_reason &= ~BIT(b); + } + } +} +EXPORT_SYMBOL(dev_change_proto_down_reason); + u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, enum bpf_netdev_command cmd) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 85a4b0101f76..a54c3e0f2ee1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1000,6 +1000,16 @@ static size_t rtnl_prop_list_size(const struct net_device *dev) return size; } +static size_t rtnl_proto_down_size(const struct net_device *dev) +{ + size_t size = nla_total_size(1); + + if (dev->proto_down_reason) + size += nla_total_size(0) + nla_total_size(4); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1041,7 +1051,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(4) /* IFLA_NEW_IFINDEX */ - + nla_total_size(1) /* IFLA_PROTO_DOWN */ + + rtnl_proto_down_size(dev) /* proto down */ + nla_total_size(4) /* IFLA_TARGET_NETNSID */ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ @@ -1658,6 +1668,35 @@ nest_cancel: return ret; } +static int rtnl_fill_proto_down(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *pr; + u32 preason; + + if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) + goto nla_put_failure; + + preason = dev->proto_down_reason; + if (!preason) + return 0; + + pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON); + if (!pr) + return -EMSGSIZE; + + if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) { + nla_nest_cancel(skb, pr); + goto nla_put_failure; + } + + nla_nest_end(skb, pr); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1708,13 +1747,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + atomic_read(&dev->carrier_down_count)) || - nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) || nla_put_u32(skb, IFLA_CARRIER_UP_COUNT, atomic_read(&dev->carrier_up_count)) || nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT, atomic_read(&dev->carrier_down_count))) goto nla_put_failure; + if (rtnl_fill_proto_down(skb, dev)) + goto nla_put_failure; + if (event != IFLA_EVENT_NONE) { if (nla_put_u32(skb, IFLA_EVENT, event)) goto nla_put_failure; @@ -1834,6 +1875,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_ALT_IFNAME] = { .type = NLA_STRING, .len = ALTIFNAMSIZ - 1 }, [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, + [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2483,6 +2525,67 @@ static int do_set_master(struct net_device *dev, int ifindex, return 0; } +static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = { + [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 }, + [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 }, +}; + +static int do_set_proto_down(struct net_device *dev, + struct nlattr *nl_proto_down, + struct nlattr *nl_proto_down_reason, + struct netlink_ext_ack *extack) +{ + struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1]; + const struct net_device_ops *ops = dev->netdev_ops; + unsigned long mask = 0; + u32 value; + bool proto_down; + int err; + + if (!ops->ndo_change_proto_down) { + NL_SET_ERR_MSG(extack, "Protodown not supported by device"); + return -EOPNOTSUPP; + } + + if (nl_proto_down_reason) { + err = nla_parse_nested_deprecated(pdreason, + IFLA_PROTO_DOWN_REASON_MAX, + nl_proto_down_reason, + ifla_proto_down_reason_policy, + NULL); + if (err < 0) + return err; + + if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) { + NL_SET_ERR_MSG(extack, "Invalid protodown reason value"); + return -EINVAL; + } + + value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]); + + if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) + mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); + + dev_change_proto_down_reason(dev, mask, value); + } + + if (nl_proto_down) { + proto_down = nla_get_u8(nl_proto_down); + + /* Dont turn off protodown if there are active reasons */ + if (!proto_down && dev->proto_down_reason) { + NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); + return -EBUSY; + } + err = dev_change_proto_down(dev, + proto_down); + if (err) + return err; + } + + return 0; +} + #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 @@ -2771,9 +2874,9 @@ static int do_setlink(const struct sk_buff *skb, } err = 0; - if (tb[IFLA_PROTO_DOWN]) { - err = dev_change_proto_down(dev, - nla_get_u8(tb[IFLA_PROTO_DOWN])); + if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) { + err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN], + tb[IFLA_PROTO_DOWN_REASON], extack); if (err) goto errout; status |= DO_SETLINK_NOTIFY; -- cgit From 0e8642cf369a37b718c15effa6ffd52c00fd7d15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jul 2020 19:09:29 -0700 Subject: tcp: fix build fong CONFIG_MPTCP=n Fixes these errors: net/ipv4/syncookies.c: In function 'tcp_get_cookie_sock': net/ipv4/syncookies.c:216:19: error: 'struct tcp_request_sock' has no member named 'drop_req' 216 | if (tcp_rsk(req)->drop_req) { | ^~ net/ipv4/syncookies.c: In function 'cookie_tcp_reqsk_alloc': net/ipv4/syncookies.c:289:27: warning: unused variable 'treq' [-Wunused-variable] 289 | struct tcp_request_sock *treq; | ^~~~ make[3]: *** [scripts/Makefile.build:280: net/ipv4/syncookies.o] Error 1 make[3]: *** Waiting for unfinished jobs.... Fixes: 9466a1ccebbe ("mptcp: enable JOIN requests even if cookies are in use") Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 11b20474be83..f0794f0232ba 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); - if (tcp_rsk(req)->drop_req) { + if (rsk_drop_req(req)) { refcount_set(&req->rsk_refcnt, 2); return child; } @@ -286,10 +286,11 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb) { - struct tcp_request_sock *treq; struct request_sock *req; #ifdef CONFIG_MPTCP + struct tcp_request_sock *treq; + if (sk_is_mptcp(sk)) ops = &mptcp_subflow_request_sock_ops; #endif -- cgit From 8b66a6fd34f519f4ad42c4ab0152c3c0782a7dbd Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Fri, 31 Jul 2020 20:01:10 -0700 Subject: fib: fix another fib_rules_ops indirect call wrapper problem It turns out that on commit 41d707b7332f ("fib: fix fib_rules_ops indirect calls wrappers") I forgot to include the case when CONFIG_IP_MULTIPLE_TABLES is not set. Fixes: 41d707b7332f ("fib: fix fib_rules_ops indirect calls wrappers") Reported-by: Randy Dunlap Cc: Stephen Rothwell Signed-off-by: Brian Vazquez Acked-by: Randy Dunlap # build-tested Signed-off-by: David S. Miller --- net/core/fib_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index fce645f6b9b1..a7a3f500a857 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -17,10 +17,16 @@ #include #ifdef CONFIG_IPV6_MULTIPLE_TABLES +#ifdef CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) \ INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__) #else +#define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) +#endif +#elif CONFIG_IP_MULTIPLE_TABLES #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__) #endif static const struct fib_kuid_range fib_kuid_range_unset = { -- cgit From 7126bd5c8bcbc015cf89864cf71d750e8f33f924 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 1 Aug 2020 16:39:59 +0200 Subject: mptcp: fix syncookie build error on UP kernel test robot says: net/mptcp/syncookies.c: In function 'mptcp_join_cookie_init': include/linux/kernel.h:47:38: warning: division by zero [-Wdiv-by-zero] #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) I forgot that spinock_t size is 0 on UP, so ARRAY_SIZE cannot be used. Fixes: 9466a1ccebbe54 ("mptcp: enable JOIN requests even if cookies are in use") Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/syncookies.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c index 6eb992789b50..abe0fd099746 100644 --- a/net/mptcp/syncookies.c +++ b/net/mptcp/syncookies.c @@ -125,8 +125,6 @@ void __init mptcp_join_cookie_init(void) { int i; - for (i = 0; i < ARRAY_SIZE(join_entry_locks); i++) + for (i = 0; i < COOKIE_JOIN_SLOTS; i++) spin_lock_init(&join_entry_locks[i]); - - BUILD_BUG_ON(ARRAY_SIZE(join_entry_locks) != ARRAY_SIZE(join_entries)); } -- cgit From 83d9dcba06c53e24e7dc47d51607d5cf9b50e5f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 2 Aug 2020 02:56:37 +0200 Subject: netfilter: nf_tables: extended netlink error reporting for expressions This patch extends 36dd1bcc07e5 ("netfilter: nf_tables: initial support for extended ACK reporting") to include netlink extended error reporting for expressions. This allows userspace to identify what rule expression is triggering the error. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0d96e4eb754d..fac552b0179f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2509,6 +2509,7 @@ nla_put_failure: struct nft_expr_info { const struct nft_expr_ops *ops; + const struct nlattr *attr; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; @@ -2556,7 +2557,9 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, } else ops = type->ops; + info->attr = nla; info->ops = ops; + return 0; err1: @@ -3214,8 +3217,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, expr = nft_expr_first(rule); for (i = 0; i < n; i++) { err = nf_tables_newexpr(&ctx, &info[i], expr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, info[i].attr); goto err2; + } if (info[i].ops->validate) nft_validate_state_update(net, NFT_VALIDATE_NEED); -- cgit From 78470d9d0d9f2f8d16f28382a4071568e839c0d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 2 Aug 2020 03:27:03 +0200 Subject: netfilter: nft_meta: fix iifgroup matching iifgroup matching erroneously checks the output interface. Fixes: 8724e819cc9a ("netfilter: nft_meta: move all interface related keys to helper") Reported-by: Demi M. Obenour Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 951b6e87ed5d..7bc6537f3ccb 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -253,7 +253,7 @@ static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest, return false; break; case NFT_META_IIFGROUP: - if (!nft_meta_store_ifgroup(dest, nft_out(pkt))) + if (!nft_meta_store_ifgroup(dest, nft_in(pkt))) return false; break; case NFT_META_OIFGROUP: -- cgit From 73b11c2ab072d5b0599d1e12cc126f55ee306daf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 31 Jul 2020 11:28:26 -0700 Subject: bpf: Add support for forced LINK_DETACH command Add LINK_DETACH command to force-detach bpf_link without destroying it. It has the same behavior as auto-detaching of bpf_link due to cgroup dying for bpf_cgroup_link or net_device being destroyed for bpf_xdp_link. In such case, bpf_link is still a valid kernel object, but is defuncts and doesn't hold BPF program attached to corresponding BPF hook. This functionality allows users with enough access rights to manually force-detach attached bpf_link without killing respective owner process. This patch implements LINK_DETACH for cgroup, xdp, and netns links, mostly re-using existing link release handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200731182830.286260-2-andriin@fb.com --- net/core/dev.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a2a57988880a..c8b911b10187 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8979,12 +8979,20 @@ static void bpf_xdp_link_release(struct bpf_link *link) /* if racing with net_device's tear down, xdp_link->dev might be * already NULL, in which case link was already auto-detached */ - if (xdp_link->dev) + if (xdp_link->dev) { WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link)); + xdp_link->dev = NULL; + } rtnl_unlock(); } +static int bpf_xdp_link_detach(struct bpf_link *link) +{ + bpf_xdp_link_release(link); + return 0; +} + static void bpf_xdp_link_dealloc(struct bpf_link *link) { struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link); @@ -9066,6 +9074,7 @@ out_unlock: static const struct bpf_link_ops bpf_xdp_link_lops = { .release = bpf_xdp_link_release, .dealloc = bpf_xdp_link_dealloc, + .detach = bpf_xdp_link_detach, .show_fdinfo = bpf_xdp_link_show_fdinfo, .fill_link_info = bpf_xdp_link_fill_link_info, .update_prog = bpf_xdp_link_update, -- cgit From 77a92189ecfd061616ad531d386639aab7baaad9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 2 Aug 2020 03:05:25 +0200 Subject: netfilter: nf_tables: report EEXIST on overlaps Replace EBUSY by EEXIST in the following cases: - If the user adds a chain with a different configuration such as different type, hook and priority. - If the user adds a non-base chain that clashes with an existing basechain. - If the user adds a { key : value } mapping element and the key exists but the value differs. - If the device already belongs to an existing flowtable. User describe that this error reporting is confusing: - https://bugzilla.netfilter.org/show_bug.cgi?id=1176 - https://bugzilla.netfilter.org/show_bug.cgi?id=1413 Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fac552b0179f..6571789989bc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2097,7 +2097,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nla[NFTA_CHAIN_HOOK]) { if (!nft_is_base_chain(chain)) - return -EBUSY; + return -EEXIST; err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, false); @@ -2107,21 +2107,21 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, basechain = nft_base_chain(chain); if (basechain->type != hook.type) { nft_chain_release_hook(&hook); - return -EBUSY; + return -EEXIST; } if (ctx->family == NFPROTO_NETDEV) { if (!nft_hook_list_equal(&basechain->hook_list, &hook.list)) { nft_chain_release_hook(&hook); - return -EBUSY; + return -EEXIST; } } else { ops = &basechain->ops; if (ops->hooknum != hook.num || ops->priority != hook.priority) { nft_chain_release_hook(&hook); - return -EBUSY; + return -EEXIST; } } nft_chain_release_hook(&hook); @@ -5262,10 +5262,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ - nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) { - err = -EBUSY; + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) goto err_element_clash; - } if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), @@ -5273,7 +5271,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) && *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2))) - err = -EBUSY; + goto err_element_clash; else if (!(nlmsg_flags & NLM_F_EXCL)) err = 0; } else if (err == -ENOTEMPTY) { @@ -6423,7 +6421,7 @@ static int nft_register_flowtable_net_hooks(struct net *net, list_for_each_entry(hook2, &ft->hook_list, list) { if (hook->ops.dev == hook2->ops.dev && hook->ops.pf == hook2->ops.pf) { - err = -EBUSY; + err = -EEXIST; goto err_unregister_net_hooks; } } -- cgit From 4e56cde15f7d68cf86ff8efff8504497de152475 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 31 Jul 2020 21:38:30 +0300 Subject: mac80211: Handle special status codes in SAE commit SAE authentication has been extended with H2E (IEEE 802.11 REVmd) and PK (WFA) options. Those extensions use special status code values in the SAE commit messages (Authentication frame with transaction sequence number 1) to identify which extension is in use. mac80211 was interpreting those new values as the AP denying authentication and that resulted in failure to complete SAE authentication in some cases. Fix this by adding exceptions for the new status code values 126 and 127. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200731183830.18735-1-jouni@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 839d0367446c..8b7ca8ddfe20 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2988,7 +2988,10 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); if (auth_alg == WLAN_AUTH_SAE && - status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED) + (status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED || + (auth_transaction == 1 && + (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT || + status_code == WLAN_STATUS_SAE_PK)))) return; sdata_info(sdata, "%pM denied authentication (status %d)\n", -- cgit From 6628d00116b37bd5ef44cc69f5a8df6b0d4e14b7 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 3 Aug 2020 10:45:40 +0200 Subject: mac8211: fix struct initialisation Sparse showed up with the following error. net/mac80211/agg-rx.c:480:43: warning: Using plain integer as NULL pointer Fixes: 2ab45876756f (mac80211: add support for the ADDBA extension element) Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20200803084540.179908-1-john@phrozen.org Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 7f245e9f114c..313ba97acae3 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -477,7 +477,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; - struct ieee802_11_elems elems = { 0 }; + struct ieee802_11_elems elems = { }; u8 dialog_token; int ies_len; -- cgit From 47d76e31908d8fdb06c6c75e4f3c4d4a08c9c850 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 1 Aug 2020 17:12:38 +0800 Subject: mac80211: use eth_zero_addr() to clear mac address Use eth_zero_addr() to clear mac address instead of memset(). Signed-off-by: Miaohe Lin Link: https://lore.kernel.org/r/1596273158-24183-1-git-send-email-linmiaohe@huawei.com Signed-off-by: Johannes Berg --- net/mac80211/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 1b4709694d2a..50ab5b9d8eab 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -22,7 +22,8 @@ #define LOCAL_PR_ARG __entry->wiphy_name #define STA_ENTRY __array(char, sta_addr, ETH_ALEN) -#define STA_ASSIGN (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : memset(__entry->sta_addr, 0, ETH_ALEN)) +#define STA_ASSIGN (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : \ + eth_zero_addr(__entry->sta_addr)) #define STA_NAMED_ASSIGN(s) memcpy(__entry->sta_addr, (s)->addr, ETH_ALEN) #define STA_PR_FMT " sta:%pM" #define STA_PR_ARG __entry->sta_addr -- cgit From 3b1648f10961ce41bb709dfcadfdb9836c9a8ab8 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 1 Aug 2020 17:15:49 +0800 Subject: nl80211: use eth_zero_addr() to clear mac address Use eth_zero_addr() to clear mac address instead of memset(). Signed-off-by: Miaohe Lin Link: https://lore.kernel.org/r/1596273349-24333-1-git-send-email-linmiaohe@huawei.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5c57ac2ee75b..814e23d3ce7c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10400,8 +10400,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) memcpy(dev->ieee80211_ptr->disconnect_bssid, connect.bssid, ETH_ALEN); else - memset(dev->ieee80211_ptr->disconnect_bssid, - 0, ETH_ALEN); + eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid); } wdev_unlock(dev->ieee80211_ptr); -- cgit From 5981fe5b0529ba25d95f37d7faa434183ad618c5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Aug 2020 11:02:10 +0200 Subject: mac80211: fix misplaced while instead of if This never was intended to be a 'while' loop, it should've just been an 'if' instead of 'while'. Fix this. I noticed this while applying another patch from Ben that intended to fix a busy loop at this spot. Cc: stable@vger.kernel.org Fixes: b16798f5b907 ("mac80211: mark station unauthorized before key removal") Reported-by: Ben Greear Link: https://lore.kernel.org/r/20200803110209.253009ae41ff.I3522aad099392b31d5cf2dcca34cbac7e5832dde@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1dc747de30cc..f2840d1d95cf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1050,7 +1050,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta) might_sleep(); lockdep_assert_held(&local->sta_mtx); - while (sta->sta_state == IEEE80211_STA_AUTHORIZED) { + if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC); WARN_ON_ONCE(ret); } -- cgit From 73f9407b3eb893bc8a82293cc8d4dfa3db079c0b Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 3 Aug 2020 10:33:04 +0300 Subject: netfilter: conntrack: Move nf_ct_offload_timeout to header file To be used by callers from other modules. [ Rename DAY to NF_CT_DAY to avoid possible symbol name pollution issue --Pablo ] Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f33d72c5b06e..c3cea50d1bcb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1344,18 +1344,6 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) return false; } -#define DAY (86400 * HZ) - -/* Set an arbitrary timeout large enough not to ever expire, this save - * us a check for the IPS_OFFLOAD_BIT from the packet path via - * nf_ct_is_expired(). - */ -static void nf_ct_offload_timeout(struct nf_conn *ct) -{ - if (nf_ct_expires(ct) < DAY / 2) - ct->timeout = nfct_time_stamp + DAY; -} - static void gc_worker(struct work_struct *work) { unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); -- cgit From 4203b19c27967d9eff6928f6a733f81892ffc592 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 3 Aug 2020 10:33:05 +0300 Subject: netfilter: flowtable: Set offload timeout when adding flow On heavily loaded systems the GC can take time to go over all existing conns and reset their timeout. At that time other calls like from nf_conntrack_in() can call of nf_ct_is_expired() and see the conn as expired. To fix this when we set the offload bit we should also reset the timeout instead of counting on GC to finish first iteration over all conns before the initial timeout. Fixes: 90964016e5d3 ("netfilter: nf_conntrack: add IPS_OFFLOAD status bit") Signed-off-by: Roi Dayan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b1eb5272b379..4f7a567c536e 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -243,6 +243,8 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) return err; } + nf_ct_offload_timeout(flow->ct); + if (nf_flowtable_hw_offload(flow_table)) { __set_bit(NF_FLOW_HW, &flow->flags); nf_flow_offload_add(flow_table, flow); -- cgit From 0b91111fb1a164fedbb68a9263afafd10ffa6ec3 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 18 Jun 2020 11:17:42 +0200 Subject: mac80211: Do not report beacon loss if beacon filtering enabled mac80211.h says: Beacon filter support is advertised with the IEEE80211_VIF_BEACON_FILTER interface capability. The driver needs to enable beacon filter support whenever power save is enabled, that is IEEE80211_CONF_PS is set. When power save is enabled, the stack will not check for beacon loss and the driver needs to notify about loss of beacons with ieee80211_beacon_loss(). Some controllers may want to dynamically enable the beacon filter capabilities on power save entry (CONF_PS) and disable it on exit. This is the case for the wcn36xx driver which only supports beacon filtering in PS mode (no CONNECTION_MONITOR support). When the mac80211 beacon monitor timer expires, the beacon filter flag must be checked again in case it as been changed in between (e.g. vif moved to PS mode). Signed-off-by: Loic Poulain Link: https://lore.kernel.org/r/1592471863-31402-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8b7ca8ddfe20..ac870309b911 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4563,6 +4563,9 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) return; + if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) + return; + sdata->u.mgd.connection_loss = false; ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_connection_loss_work); -- cgit From 966e50597666d530b69de2abb9c83ff0a9bd3ee6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Jul 2020 14:47:58 -0700 Subject: udp_tunnel: add the ability to hard-code IANA VXLAN mlx5 has the IANA VXLAN port (4789) hard coded by the device, instead of being added dynamically when tunnels are created. To support this add a workaround flag to struct udp_tunnel_nic_info. Skipping updates for the port is fairly trivial, dumping the hard coded port via ethtool requires some code duplication. The port is not a part of any real table, we dump it in a special table which has no tunnel types supported and only one entry. This is the last known workaround / hack needed to convert all drivers to the new infra. Signed-off-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- net/ethtool/tunnels.c | 69 +++++++++++++++++++++++++++++++++++++++++------ net/ipv4/udp_tunnel_nic.c | 7 +++++ 2 files changed, 68 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index 6b89255f1231..84f23289475b 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -2,6 +2,7 @@ #include #include +#include #include "bitset.h" #include "common.h" @@ -18,6 +19,20 @@ static_assert(ETHTOOL_UDP_TUNNEL_TYPE_GENEVE == ilog2(UDP_TUNNEL_TYPE_GENEVE)); static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE == ilog2(UDP_TUNNEL_TYPE_VXLAN_GPE)); +static ssize_t ethnl_udp_table_reply_size(unsigned int types, bool compact) +{ + ssize_t size; + + size = ethnl_bitset32_size(&types, NULL, __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + udp_tunnel_type_names, compact); + if (size < 0) + return size; + + return size + + nla_total_size(0) + /* _UDP_TABLE */ + nla_total_size(sizeof(u32)); /* _UDP_TABLE_SIZE */ +} + static ssize_t ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, struct netlink_ext_ack *extack) @@ -25,8 +40,8 @@ ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct udp_tunnel_nic_info *info; unsigned int i; + ssize_t ret; size_t size; - int ret; info = req_base->dev->udp_tunnel_nic_info; if (!info) { @@ -39,13 +54,10 @@ ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { if (!info->tables[i].n_entries) - return size; + break; - size += nla_total_size(0); /* _UDP_TABLE */ - size += nla_total_size(sizeof(u32)); /* _UDP_TABLE_SIZE */ - ret = ethnl_bitset32_size(&info->tables[i].tunnel_types, NULL, - __ETHTOOL_UDP_TUNNEL_TYPE_CNT, - udp_tunnel_type_names, compact); + ret = ethnl_udp_table_reply_size(info->tables[i].tunnel_types, + compact); if (ret < 0) return ret; size += ret; @@ -53,6 +65,17 @@ ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, size += udp_tunnel_nic_dump_size(req_base->dev, i); } + if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN) { + ret = ethnl_udp_table_reply_size(0, compact); + if (ret < 0) + return ret; + size += ret; + + size += nla_total_size(0) + /* _TABLE_ENTRY */ + nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */ + nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */ + } + return size; } @@ -62,7 +85,7 @@ ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base, { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct udp_tunnel_nic_info *info; - struct nlattr *ports, *table; + struct nlattr *ports, *table, *entry; unsigned int i; info = req_base->dev->udp_tunnel_nic_info; @@ -97,10 +120,40 @@ ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base, nla_nest_end(skb, table); } + if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN) { + u32 zero = 0; + + table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE); + if (!table) + goto err_cancel_ports; + + if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, 1)) + goto err_cancel_table; + + if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, + &zero, NULL, + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + udp_tunnel_type_names, compact)) + goto err_cancel_table; + + entry = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); + + if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, + htons(IANA_VXLAN_UDP_PORT)) || + nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, + ilog2(UDP_TUNNEL_TYPE_VXLAN))) + goto err_cancel_entry; + + nla_nest_end(skb, entry); + nla_nest_end(skb, table); + } + nla_nest_end(skb, ports); return 0; +err_cancel_entry: + nla_nest_cancel(skb, entry); err_cancel_table: nla_nest_cancel(skb, table); err_cancel_ports: diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index f0dbd9905a53..69962165c0e8 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -7,6 +7,7 @@ #include #include #include +#include enum udp_tunnel_nic_table_entry_flags { UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0), @@ -504,6 +505,12 @@ __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) return; if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY) return; + if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN && + ti->port == htons(IANA_VXLAN_UDP_PORT)) { + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + netdev_warn(dev, "device assumes port 4789 will be used by vxlan tunnels\n"); + return; + } if (!udp_tunnel_nic_is_capable(dev, utn, ti)) return; -- cgit From fa5cb548ced61b9d3095f32f8a7e427a248c65ee Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Mon, 3 Aug 2020 12:05:44 +0300 Subject: bpf: Setup socket family and addresses in bpf_prog_test_run_skb Now it's impossible to test all branches of cgroup_skb bpf program which accesses skb->family and skb->{local,remote}_ip{4,6} fields because they are zeroed during socket allocation. This commit fills socket family and addresses from related fields in constructed skb. Signed-off-by: Dmitry Yakunin Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200803090545.82046-2-zeil@yandex-team.ru --- net/bpf/test_run.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b03c469cd01f..736a5964fa95 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -449,6 +449,27 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); skb_reset_network_header(skb); + switch (skb->protocol) { + case htons(ETH_P_IP): + sk->sk_family = AF_INET; + if (sizeof(struct iphdr) <= skb_headlen(skb)) { + sk->sk_rcv_saddr = ip_hdr(skb)->saddr; + sk->sk_daddr = ip_hdr(skb)->daddr; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + sk->sk_family = AF_INET6; + if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) { + sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr; + sk->sk_v6_daddr = ipv6_hdr(skb)->daddr; + } + break; +#endif + default: + break; + } + if (is_l2) __skb_push(skb, hh_len); if (is_direct_pkt_access) -- cgit From 21594c44083c375697d418729c4b2e4522cf9f70 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Mon, 3 Aug 2020 12:05:45 +0300 Subject: bpf: Allow to specify ifindex for skb in bpf_prog_test_run_skb Now skb->dev is unconditionally set to the loopback device in current net namespace. But if we want to test bpf program which contains code branch based on ifindex condition (eg filters out localhost packets) it is useful to allow specifying of ifindex from userspace. This patch adds such option through ctx_in (__sk_buff) parameter. Signed-off-by: Dmitry Yakunin Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200803090545.82046-3-zeil@yandex-team.ru --- net/bpf/test_run.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 736a5964fa95..99eb8c6c0fbc 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -327,6 +327,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* priority is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), + offsetof(struct __sk_buff, ifindex))) + return -EINVAL; + + /* ifindex is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex), offsetof(struct __sk_buff, cb))) return -EINVAL; @@ -381,6 +387,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) __skb->mark = skb->mark; __skb->priority = skb->priority; + __skb->ifindex = skb->dev->ifindex; __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); __skb->wire_len = cb->pkt_len; @@ -391,6 +398,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { bool is_l2 = false, is_direct_pkt_access = false; + struct net *net = current->nsproxy->net_ns; + struct net_device *dev = net->loopback_dev; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; @@ -432,7 +441,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, kfree(ctx); return -ENOMEM; } - sock_net_set(sk, current->nsproxy->net_ns); + sock_net_set(sk, net); sock_init_data(NULL, sk); skb = build_skb(data, 0); @@ -446,7 +455,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); - skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); + if (ctx && ctx->ifindex > 1) { + dev = dev_get_by_index(net, ctx->ifindex); + if (!dev) { + ret = -ENODEV; + goto out; + } + } + skb->protocol = eth_type_trans(skb, dev); skb_reset_network_header(skb); switch (skb->protocol) { @@ -502,6 +518,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ret = bpf_ctx_finish(kattr, uattr, ctx, sizeof(struct __sk_buff)); out: + if (dev && dev != net->loopback_dev) + dev_put(dev); kfree_skb(skb); bpf_sk_storage_free(sk); kfree(sk); -- cgit From 038ebb1a713d114d54dbf14868a73181c0c92758 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 31 Jul 2020 10:45:01 +0800 Subject: net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct When openvswitch conntrack offload with act_ct action. Fragment packets defrag in the ingress tc act_ct action and miss the next chain. Then the packet pass to the openvswitch datapath without the mru. The over mtu packet will be dropped in output action in openvswitch for over mtu. "kernel: net2: dropped over-mtu packet: 1528 > 1500" This patch add mru in the tc_skb_ext for adefrag and miss next chain situation. And also add mru in the qdisc_skb_cb. The act_ct set the mru to the qdisc_skb_cb when the packet defrag. And When the chain miss, The mru is set to tc_skb_ext which can be got by ovs datapath. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: wenxu Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 1 + net/sched/act_ct.c | 8 ++++++-- net/sched/cls_api.c | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d375e74b607..03942c30d83e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -890,6 +890,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (static_branch_unlikely(&tc_recirc_sharing_support)) { tc_ext = skb_ext_find(skb, TC_SKB_EXT); key->recirc_id = tc_ext ? tc_ext->chain : 0; + OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; } else { key->recirc_id = 0; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 97e27946897f..e6ad42b11835 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -706,8 +706,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err && err != -EINPROGRESS) goto out_free; - if (!err) + if (!err) { *defrag = true; + cb.mru = IPCB(skb)->frag_max_size; + } } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; @@ -717,8 +719,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err && err != -EINPROGRESS) goto out_free; - if (!err) + if (!err) { *defrag = true; + cb.mru = IP6CB(skb)->frag_max_size; + } #else err = -EOPNOTSUPP; goto out_free; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0b8623b3b24f..41a55c6cbeb8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1629,6 +1629,7 @@ int tcf_classify_ingress(struct sk_buff *skb, if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; + ext->mru = qdisc_skb_cb(skb)->mru; } return ret; -- cgit From 9aba6c5b49254d5bee927d81593ed4429e91d4ae Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 31 Jul 2020 00:48:38 -0400 Subject: openvswitch: Prevent kernel-infoleak in ovs_ct_put_key() ovs_ct_put_key() is potentially copying uninitialized kernel stack memory into socket buffers, since the compiler may leave a 3-byte hole at the end of `struct ovs_key_ct_tuple_ipv4` and `struct ovs_key_ct_tuple_ipv6`. Fix it by initializing `orig` with memset(). Fixes: 9dd7f8907c37 ("openvswitch: Add original direction conntrack tuple to sw_flow_key.") Suggested-by: Dan Carpenter Signed-off-by: Peilin Ye Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 4340f25fe390..98d393e70de3 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -276,10 +276,6 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) ovs_ct_update_key(skb, NULL, key, false, false); } -#define IN6_ADDR_INITIALIZER(ADDR) \ - { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ - (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } - int ovs_ct_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { @@ -301,24 +297,30 @@ int ovs_ct_put_key(const struct sw_flow_key *swkey, if (swkey->ct_orig_proto) { if (swkey->eth.type == htons(ETH_P_IP)) { - struct ovs_key_ct_tuple_ipv4 orig = { - output->ipv4.ct_orig.src, - output->ipv4.ct_orig.dst, - output->ct.orig_tp.src, - output->ct.orig_tp.dst, - output->ct_orig_proto, - }; + struct ovs_key_ct_tuple_ipv4 orig; + + memset(&orig, 0, sizeof(orig)); + orig.ipv4_src = output->ipv4.ct_orig.src; + orig.ipv4_dst = output->ipv4.ct_orig.dst; + orig.src_port = output->ct.orig_tp.src; + orig.dst_port = output->ct.orig_tp.dst; + orig.ipv4_proto = output->ct_orig_proto; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, sizeof(orig), &orig)) return -EMSGSIZE; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - struct ovs_key_ct_tuple_ipv6 orig = { - IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), - IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), - output->ct.orig_tp.src, - output->ct.orig_tp.dst, - output->ct_orig_proto, - }; + struct ovs_key_ct_tuple_ipv6 orig; + + memset(&orig, 0, sizeof(orig)); + memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32, + sizeof(orig.ipv6_src)); + memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32, + sizeof(orig.ipv6_dst)); + orig.src_port = output->ct.orig_tp.src; + orig.dst_port = output->ct.orig_tp.dst; + orig.ipv6_proto = output->ct_orig_proto; + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, sizeof(orig), &orig)) return -EMSGSIZE; -- cgit From 71fed0bc8665969f4681fc5772e5df42bfcc472e Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Fri, 31 Jul 2020 00:58:44 -0400 Subject: ethtool: ethnl_set_linkmodes: remove redundant null check info cannot be NULL here since its being accessed earlier in the function: nlmsg_parse(info->nlhdr...). Remove this redundant NULL check. Signed-off-by: Gaurav Singh Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/linkmodes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 317a93129551..7044a2853886 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -421,8 +421,7 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) ret = __ethtool_get_link_ksettings(dev, &ksettings); if (ret < 0) { - if (info) - GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); + GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); goto out_ops; } -- cgit From 9d2f627b7ec9d5d3246b6cec17f290ee6778c83b Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 31 Jul 2020 14:20:56 +0200 Subject: net: openvswitch: add masks cache hit counter Add a counter that counts the number of masks cache hits, and export it through the megaflow netlink statistics. Reviewed-by: Paolo Abeni Reviewed-by: Tonghao Zhang Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 5 ++++- net/openvswitch/datapath.h | 3 +++ net/openvswitch/flow_table.c | 19 ++++++++++++++----- net/openvswitch/flow_table.h | 3 ++- 4 files changed, 23 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6b6822f82f70..f45fee760504 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -225,13 +225,14 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; + u32 n_cache_hit; int error; stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), - &n_mask_hit); + &n_mask_hit, &n_cache_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; @@ -262,6 +263,7 @@ out: u64_stats_update_begin(&stats->syncp); (*stats_counter)++; stats->n_mask_hit += n_mask_hit; + stats->n_cache_hit += n_cache_hit; u64_stats_update_end(&stats->syncp); } @@ -699,6 +701,7 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; mega_stats->n_mask_hit += local_stats.n_mask_hit; + mega_stats->n_cache_hit += local_stats.n_cache_hit; } } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 24fcec22fde2..38f7d3e66ca6 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -38,12 +38,15 @@ * @n_mask_hit: Number of masks looked up for flow match. * @n_mask_hit / (@n_hit + @n_missed) will be the average masks looked * up per packet. + * @n_cache_hit: The number of received packets that had their mask found using + * the mask cache. */ struct dp_stats_percpu { u64 n_hit; u64 n_missed; u64 n_lost; u64 n_mask_hit; + u64 n_cache_hit; struct u64_stats_sync syncp; }; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index af22c9ee28dd..a5912ea05352 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -667,6 +667,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct mask_array *ma, const struct sw_flow_key *key, u32 *n_mask_hit, + u32 *n_cache_hit, u32 *index) { u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr); @@ -682,6 +683,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u64_stats_update_begin(&ma->syncp); usage_counters[*index]++; u64_stats_update_end(&ma->syncp); + (*n_cache_hit)++; return flow; } } @@ -719,7 +721,8 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, const struct sw_flow_key *key, u32 skb_hash, - u32 *n_mask_hit) + u32 *n_mask_hit, + u32 *n_cache_hit) { struct mask_array *ma = rcu_dereference(tbl->mask_array); struct table_instance *ti = rcu_dereference(tbl->ti); @@ -729,10 +732,13 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, int seg; *n_mask_hit = 0; + *n_cache_hit = 0; if (unlikely(!skb_hash)) { u32 mask_index = 0; + u32 cache = 0; - return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); + return flow_lookup(tbl, ti, ma, key, n_mask_hit, &cache, + &mask_index); } /* Pre and post recirulation flows usually have the same skb_hash @@ -753,7 +759,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, e = &entries[index]; if (e->skb_hash == skb_hash) { flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, - &e->mask_index); + n_cache_hit, &e->mask_index); if (!flow) e->skb_hash = 0; return flow; @@ -766,10 +772,12 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, } /* Cache miss, do full lookup. */ - flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index); + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, n_cache_hit, + &ce->mask_index); if (flow) ce->skb_hash = skb_hash; + *n_cache_hit = 0; return flow; } @@ -779,9 +787,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); u32 __always_unused n_mask_hit; + u32 __always_unused n_cache_hit; u32 index = 0; - return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index); + return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index); } struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 1f664b050e3b..325e939371d8 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -82,7 +82,8 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, const struct sw_flow_key *, u32 skb_hash, - u32 *n_mask_hit); + u32 *n_mask_hit, + u32 *n_cache_hit); struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, -- cgit From 9bf24f594c6acf676fb8c229f152c21bfb915ddb Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 31 Jul 2020 14:21:34 +0200 Subject: net: openvswitch: make masks cache size configurable This patch makes the masks cache size configurable, or with a size of 0, disable it. Reviewed-by: Paolo Abeni Reviewed-by: Tonghao Zhang Signed-off-by: Eelco Chaudron Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 17 ++++++++ net/openvswitch/flow_table.c | 101 +++++++++++++++++++++++++++++++++++++------ net/openvswitch/flow_table.h | 10 ++++- 3 files changed, 114 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f45fee760504..42f8cc70bb2c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void) msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats)); msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */ + msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */ return msgsize; } @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features)) goto nla_put_failure; + if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, + ovs_flow_tbl_masks_cache_size(&dp->table))) + goto nla_put_failure; + genlmsg_end(skb, ovs_header); return 0; @@ -1599,6 +1604,16 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) #endif } + if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { + int err; + u32 cache_size; + + cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); + err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size); + if (err) + return err; + } + dp->user_features = user_features; if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) @@ -1887,6 +1902,8 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, + [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0, + PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)), }; static const struct genl_ops dp_datapath_genl_ops[] = { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index a5912ea05352..6527d84c3ea6 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -38,8 +38,8 @@ #define MASK_ARRAY_SIZE_MIN 16 #define REHASH_INTERVAL (10 * 60 * HZ) +#define MC_DEFAULT_HASH_ENTRIES 256 #define MC_HASH_SHIFT 8 -#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT) #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) static struct kmem_cache *flow_cache; @@ -341,15 +341,79 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) } } +static void __mask_cache_destroy(struct mask_cache *mc) +{ + free_percpu(mc->mask_cache); + kfree(mc); +} + +static void mask_cache_rcu_cb(struct rcu_head *rcu) +{ + struct mask_cache *mc = container_of(rcu, struct mask_cache, rcu); + + __mask_cache_destroy(mc); +} + +static struct mask_cache *tbl_mask_cache_alloc(u32 size) +{ + struct mask_cache_entry __percpu *cache = NULL; + struct mask_cache *new; + + /* Only allow size to be 0, or a power of 2, and does not exceed + * percpu allocation size. + */ + if ((!is_power_of_2(size) && size != 0) || + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->cache_size = size; + if (new->cache_size > 0) { + cache = __alloc_percpu(array_size(sizeof(struct mask_cache_entry), + new->cache_size), + __alignof__(struct mask_cache_entry)); + if (!cache) { + kfree(new); + return NULL; + } + } + + new->mask_cache = cache; + return new; +} +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) +{ + struct mask_cache *mc = rcu_dereference(table->mask_cache); + struct mask_cache *new; + + if (size == mc->cache_size) + return 0; + + if ((!is_power_of_2(size) && size != 0) || + (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE) + return -EINVAL; + + new = tbl_mask_cache_alloc(size); + if (!new) + return -ENOMEM; + + rcu_assign_pointer(table->mask_cache, new); + call_rcu(&mc->rcu, mask_cache_rcu_cb); + + return 0; +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; + struct mask_cache *mc; struct mask_array *ma; - table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * - MC_HASH_ENTRIES, - __alignof__(struct mask_cache_entry)); - if (!table->mask_cache) + mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES); + if (!mc) return -ENOMEM; ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); @@ -367,6 +431,7 @@ int ovs_flow_tbl_init(struct flow_table *table) rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ufid_ti, ufid_ti); rcu_assign_pointer(table->mask_array, ma); + rcu_assign_pointer(table->mask_cache, mc); table->last_rehash = jiffies; table->count = 0; table->ufid_count = 0; @@ -377,7 +442,7 @@ free_ti: free_mask_array: __mask_array_destroy(ma); free_mask_cache: - free_percpu(table->mask_cache); + __mask_cache_destroy(mc); return -ENOMEM; } @@ -453,9 +518,11 @@ void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); + struct mask_cache *mc = rcu_dereference(table->mask_cache); + struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); - free_percpu(table->mask_cache); - call_rcu(&table->mask_array->rcu, mask_array_rcu_cb); + call_rcu(&mc->rcu, mask_cache_rcu_cb); + call_rcu(&ma->rcu, mask_array_rcu_cb); table_instance_destroy(table, ti, ufid_ti, false); } @@ -724,6 +791,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, u32 *n_mask_hit, u32 *n_cache_hit) { + struct mask_cache *mc = rcu_dereference(tbl->mask_cache); struct mask_array *ma = rcu_dereference(tbl->mask_array); struct table_instance *ti = rcu_dereference(tbl->ti); struct mask_cache_entry *entries, *ce; @@ -733,7 +801,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, *n_mask_hit = 0; *n_cache_hit = 0; - if (unlikely(!skb_hash)) { + if (unlikely(!skb_hash || mc->cache_size == 0)) { u32 mask_index = 0; u32 cache = 0; @@ -749,11 +817,11 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, ce = NULL; hash = skb_hash; - entries = this_cpu_ptr(tbl->mask_cache); + entries = this_cpu_ptr(mc->mask_cache); /* Find the cache entry 'ce' to operate on. */ for (seg = 0; seg < MC_HASH_SEGS; seg++) { - int index = hash & (MC_HASH_ENTRIES - 1); + int index = hash & (mc->cache_size - 1); struct mask_cache_entry *e; e = &entries[index]; @@ -867,6 +935,13 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) return READ_ONCE(ma->count); } +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) +{ + struct mask_cache *mc = rcu_dereference(table->mask_cache); + + return READ_ONCE(mc->cache_size); +} + static struct table_instance *table_instance_expand(struct table_instance *ti, bool ufid) { @@ -1095,8 +1170,8 @@ void ovs_flow_masks_rebalance(struct flow_table *table) for (i = 0; i < masks_entries; i++) { int index = masks_and_count[i].index; - new->masks[new->count++] = - rcu_dereference_ovsl(ma->masks[index]); + if (ovsl_dereference(ma->masks[index])) + new->masks[new->count++] = ma->masks[index]; } rcu_assign_pointer(table->mask_array, new); diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 325e939371d8..74ce48fecba9 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -27,6 +27,12 @@ struct mask_cache_entry { u32 mask_index; }; +struct mask_cache { + struct rcu_head rcu; + u32 cache_size; /* Must be ^2 value. */ + struct mask_cache_entry __percpu *mask_cache; +}; + struct mask_count { int index; u64 counter; @@ -53,7 +59,7 @@ struct table_instance { struct flow_table { struct table_instance __rcu *ti; struct table_instance __rcu *ufid_ti; - struct mask_cache_entry __percpu *mask_cache; + struct mask_cache __rcu *mask_cache; struct mask_array __rcu *mask_array; unsigned long last_rehash; unsigned int count; @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, const struct sw_flow_mask *mask); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); int ovs_flow_tbl_num_masks(const struct flow_table *table); +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table); +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size); struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, u32 *bucket, u32 *idx); struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, -- cgit From d208a42a62e70b9c86b46b7bad126eb862891314 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Fri, 31 Jul 2020 15:32:06 +0200 Subject: ipv6/addrconf: call addrconf_ifdown with consistent values Second parameter of addrconf_ifdown "how" is used as a boolean internally. It does not make sense to call it with something different of 0 or 1. This value is set to 2 in all git history. Signed-off-by: Florent Fourcot Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 840bfdb3d7bd..861265fa9d6d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7189,7 +7189,7 @@ void addrconf_cleanup(void) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(init_net.loopback_dev, 2); + addrconf_ifdown(init_net.loopback_dev, 1); /* * Check hash table. -- cgit From ae79dbf60905a09a5cca63c02300b2a0f1befbad Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Fri, 31 Jul 2020 15:32:07 +0200 Subject: ipv6/addrconf: use a boolean to choose between UNREGISTER/DOWN "how" was used as a boolean. Change the type to bool, and improve variable name Signed-off-by: Florent Fourcot Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 861265fa9d6d..0acf6a9796ca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -163,7 +163,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); static void addrconf_type_change(struct net_device *dev, unsigned long event); -static int addrconf_ifdown(struct net_device *dev, int how); +static int addrconf_ifdown(struct net_device *dev, bool unregister); static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, @@ -3630,7 +3630,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, * an L3 master device (e.g., VRF) */ if (info->upper_dev && netif_is_l3_master(info->upper_dev)) - addrconf_ifdown(dev, 0); + addrconf_ifdown(dev, false); } return NOTIFY_OK; @@ -3663,9 +3663,9 @@ static bool addr_is_local(const struct in6_addr *addr) (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } -static int addrconf_ifdown(struct net_device *dev, int how) +static int addrconf_ifdown(struct net_device *dev, bool unregister) { - unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN; + unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN; struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; @@ -3684,7 +3684,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) * Step 1: remove reference to ipv6 device from parent device. * Do not dev_put! */ - if (how) { + if (unregister) { idev->dead = 1; /* protected by rtnl_lock */ @@ -3698,7 +3698,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* combine the user config with event to determine if permanent * addresses are to be removed from address hash table */ - if (!how && !idev->cnf.disable_ipv6) { + if (!unregister && !idev->cnf.disable_ipv6) { /* aggregate the system setting and interface setting */ int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; @@ -3736,7 +3736,7 @@ restart: addrconf_del_rs_timer(idev); /* Step 2: clear flags for stateless addrconf */ - if (!how) + if (!unregister) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); /* Step 3: clear tempaddr list */ @@ -3806,7 +3806,7 @@ restart: write_unlock_bh(&idev->lock); /* Step 5: Discard anycast and multicast list */ - if (how) { + if (unregister) { ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); } else { @@ -3816,7 +3816,7 @@ restart: idev->tstamp = jiffies; /* Last: Shot the device (if unregistered) */ - if (how) { + if (unregister) { addrconf_sysctl_unregister(idev); neigh_parms_release(&nd_tbl, idev->nd_parms); neigh_ifdown(&nd_tbl, dev); @@ -4038,7 +4038,7 @@ static void addrconf_dad_work(struct work_struct *w) in6_ifa_hold(ifp); addrconf_dad_stop(ifp, 1); if (disable_ipv6) - addrconf_ifdown(idev->dev, 0); + addrconf_ifdown(idev->dev, false); goto out; } @@ -7187,9 +7187,9 @@ void addrconf_cleanup(void) for_each_netdev(&init_net, dev) { if (__in6_dev_get(dev) == NULL) continue; - addrconf_ifdown(dev, 1); + addrconf_ifdown(dev, true); } - addrconf_ifdown(init_net.loopback_dev, 1); + addrconf_ifdown(init_net.loopback_dev, true); /* * Check hash table. -- cgit From fd65e5a95d08389444e8591a20538b3edece0e15 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 31 Jul 2020 19:26:16 +0300 Subject: net: bridge: clear bridge's private skb space on xmit We need to clear all of the bridge private skb variables as they can be stale due to the packet being recirculated through the stack and then transmitted through the bridge device. Similar memset is already done on bridge's input. We've seen cases where proxyarp_replied was 1 on routed multicast packets transmitted through the bridge to ports with neigh suppress which were getting dropped. Same thing can in theory happen with the port isolation bit as well. Fixes: 821f1b21cabb ("bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8c7b78f8bc23..9a2fb4aa1a10 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -36,6 +36,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest; u16 vid = 0; + memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); + rcu_read_lock(); nf_ops = rcu_dereference(nf_br_ops); if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) { -- cgit From 622e32b7d4a6492cf5c1f759ef833f817418f7b3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 31 Jul 2020 20:12:05 +0200 Subject: net: gre: recompute gre csum for sctp over gre tunnels The GRE tunnel can be used to transport traffic that does not rely on a Internet checksum (e.g. SCTP). The issue can be triggered creating a GRE or GRETAP tunnel and transmitting SCTP traffic ontop of it where CRC offload has been disabled. In order to fix the issue we need to recompute the GRE csum in gre_gso_segment() not relying on the inner checksum. The issue is still present when we have the CRC offload enabled. In this case we need to disable the CRC offload if we require GRE checksum since otherwise skb_checksum() will report a wrong value. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 2e6d1b7a7bc9..e0a246575887 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -15,12 +15,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + bool need_csum, need_recompute_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, gso_partial; if (!skb->encapsulation) goto out; @@ -41,6 +41,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb->protocol = skb->inner_protocol; need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); + need_recompute_csum = skb->csum_not_inet; skb->encap_hdr_csum = need_csum; features &= skb->dev->hw_enc_features; @@ -98,7 +99,15 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } *(pcsum + 1) = 0; - *pcsum = gso_make_checksum(skb, 0); + if (need_recompute_csum && !skb_is_gso(skb)) { + __wsum csum; + + csum = skb_checksum(skb, gre_offset, + skb->len - gre_offset, 0); + *pcsum = csum_fold(csum); + } else { + *pcsum = gso_make_checksum(skb, 0); + } } while ((skb = skb->next)); out: return segs; -- cgit From c15fc199b3757496325c4855662fad89bd1efdad Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 1 Aug 2020 17:30:23 +0800 Subject: net: Use __skb_pagelen() directly in skb_cow_data() In fact, skb_pagelen() - skb_headlen() is equal to __skb_pagelen(), use it directly to avoid unnecessary skb_headlen() call. Also fix the CHECK note of checkpatch.pl: Comparison to NULL could be written "!__pskb_pull_tail" Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e2edfbe0e19..97ad6523e490 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4413,7 +4413,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) * at the moment even if they are anonymous). */ if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) && - __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL) + !__pskb_pull_tail(skb, __skb_pagelen(skb))) return -ENOMEM; /* Easy case. Most of packets will go this way. */ -- cgit From 2f631133c40cd8e311ae393518c3e651e476ab66 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 1 Aug 2020 17:36:05 +0800 Subject: net: Pass NULL to skb_network_protocol() when we don't care about vlan depth When we don't care about vlan depth, we could pass NULL instead of the address of a unused local variable to skb_network_protocol() as a param. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97ad6523e490..2828f6d5ba89 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3758,7 +3758,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int err = -ENOMEM; int i = 0; int pos; - int dummy; if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { @@ -3780,7 +3779,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, } __skb_push(head_skb, doffset); - proto = skb_network_protocol(head_skb, &dummy); + proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL); -- cgit From d0f6ba2ef2c1c95069509e71402e7d6d43452512 Mon Sep 17 00:00:00 2001 From: Vincent Duvert Date: Sun, 2 Aug 2020 07:06:51 +0200 Subject: appletalk: Fix atalk_proc_init() return path Add a missing return statement to atalk_proc_init so it doesn't return -ENOMEM when successful. This allows the appletalk module to load properly. Fixes: e2bcd8b0ce6e ("appletalk: use remove_proc_subtree to simplify procfs code") Link: https://www.downtowndougbrown.com/2020/08/hacking-up-a-fix-for-the-broken-appletalk-kernel-module-in-linux-5-1-and-newer/ Reported-by: Christopher KOBAYASHI Reported-by: Doug Brown Signed-off-by: Vincent Duvert [lukas: add missing tags] Signed-off-by: Lukas Wunner Cc: stable@vger.kernel.org # v5.1+ Cc: Yue Haibing Signed-off-by: David S. Miller --- net/appletalk/atalk_proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 550c6ca007cc..9c1241292d1d 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -229,6 +229,8 @@ int __init atalk_proc_init(void) sizeof(struct aarp_iter_state), NULL)) goto out; + return 0; + out: remove_proc_subtree("atalk", init_net.proc_net); return -ENOMEM; -- cgit From fbc97de84ef88bc38874c41be151cf220c72a865 Mon Sep 17 00:00:00 2001 From: Huang Guobin Date: Sun, 2 Aug 2020 22:00:55 -0400 Subject: tipc: Use is_broadcast_ether_addr() instead of memcmp() Using is_broadcast_ether_addr() instead of directly use memcmp() to determine if the ethernet address is broadcast address. spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Huang Guobin Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/eth_media.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 8b0bb600602d..c68019697cfe 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -62,12 +62,10 @@ static int tipc_eth_raw2addr(struct tipc_bearer *b, struct tipc_media_addr *addr, char *msg) { - char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - memset(addr, 0, sizeof(*addr)); ether_addr_copy(addr->value, msg); addr->media_id = TIPC_MEDIA_TYPE_ETH; - addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); + addr->broadcast = is_broadcast_ether_addr(addr->value); return 0; } -- cgit From 730e700e2c19d87e578ff0e7d8cb1d4a02b036d2 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Thu, 30 Jul 2020 23:49:16 +0000 Subject: tcp: apply a floor of 1 for RTT samples from TCP timestamps For retransmitted packets, TCP needs to resort to using TCP timestamps for computing RTT samples. In the common case where the data and ACK fall in the same 1-millisecond interval, TCP senders with millisecond- granularity TCP timestamps compute a ca_rtt_us of 0. This ca_rtt_us of 0 propagates to rs->rtt_us. This value of 0 can cause performance problems for congestion control modules. For example, in BBR, the zero min_rtt sample can bring the min_rtt and BDP estimate down to 0, reduce snd_cwnd and result in a low throughput. It would be hard to mitigate this with filtering in the congestion control module, because the proper floor to apply would depend on the method of RTT sampling (using timestamp options or internally-saved transmission timestamps). This fix applies a floor of 1 for the RTT sample delta from TCP timestamps, so that seq_rtt_us, ca_rtt_us, and rs->rtt_us will be at least 1 * (USEC_PER_SEC / TCP_TS_HZ). Note that the receiver RTT computation in tcp_rcv_rtt_measure() and min_rtt computation in tcp_update_rtt_min() both already apply a floor of 1 timestamp tick, so this commit makes the code more consistent in avoiding this edge case of a value of 0. Signed-off-by: Jianfeng Wang Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Kevin Yang Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 739da25b0c23..184ea556f50e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2950,6 +2950,8 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + if (!delta) + delta = 1; seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); ca_rtt_us = seq_rtt_us; } -- cgit From 88fab21c691bb1ff164e540735237a385e3afeaf Mon Sep 17 00:00:00 2001 From: Ioana-Ruxandra Stăncioi Date: Mon, 3 Aug 2020 07:33:33 +0000 Subject: seg6_iptunnel: Refactor seg6_lwt_headroom out of uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the function seg6_lwt_headroom out of the seg6_iptunnel.h uapi header, because it is only used in seg6_iptunnel.c. Moreover, it is only used in the kernel code, as indicated by the "#ifdef __KERNEL__". Suggested-by: David Miller Signed-off-by: Ioana-Ruxandra Stăncioi Signed-off-by: David S. Miller --- net/ipv6/seg6_iptunnel.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index e0e9f48ab14f..897fa59c47de 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -27,6 +27,23 @@ #include #endif +static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int head = 0; + + switch (tuninfo->mode) { + case SEG6_IPTUN_MODE_INLINE: + break; + case SEG6_IPTUN_MODE_ENCAP: + head = sizeof(struct ipv6hdr); + break; + case SEG6_IPTUN_MODE_L2ENCAP: + return 0; + } + + return ((tuninfo->srh->hdrlen + 1) << 3) + head; +} + struct seg6_lwt { struct dst_cache cache; struct seg6_iptunnel_encap tuninfo[]; -- cgit From 190f8b060ee38fcea885e08b2fe0e3fdd428a618 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 3 Aug 2020 21:00:44 +0800 Subject: mptcp: use mptcp_for_each_subflow in mptcp_stream_accept Use mptcp_for_each_subflow in mptcp_stream_accept instead of open-coding. Signed-off-by: Geliang Tang Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d3fe7296e1c9..400824eabf73 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2249,7 +2249,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, * This is needed so NOSPACE flag can be set from tcp stack. */ __mptcp_flush_join_list(msk); - list_for_each_entry(subflow, &msk->conn_list, node) { + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (!ssk->sk_socket) -- cgit From 80fbbb1672e7815a15d7329638537961d65c453d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 3 Aug 2020 21:19:48 +0800 Subject: fib: Fix undef compile warning net/core/fib_rules.c:26:7: warning: "CONFIG_IP_MULTIPLE_TABLES" is not defined, evaluates to 0 [-Wundef] #elif CONFIG_IP_MULTIPLE_TABLES ^~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 8b66a6fd34f5 ("fib: fix another fib_rules_ops indirect call wrapper problem") Signed-off-by: YueHaibing Acked-By: Brian Vazquez Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a7a3f500a857..51678a528f85 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -23,7 +23,7 @@ #else #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__) #endif -#elif CONFIG_IP_MULTIPLE_TABLES +#elif defined(CONFIG_IP_MULTIPLE_TABLES) #define INDIRECT_CALL_MT(f, f2, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) #else #define INDIRECT_CALL_MT(f, f2, f1, ...) f(__VA_ARGS__) -- cgit From 08e335f6ad35a019f4cb1a74badc2f4bceb63bcf Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 3 Aug 2020 19:11:33 +0300 Subject: devlink: Add early_drop trap Add the packet trap that can report packets that were ECN marked due to RED AQM. Signed-off-by: Amit Cohen Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 5fdebb7289e9..bde4c29a30bc 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8801,6 +8801,7 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(PTP_GENERAL, CONTROL), DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL), DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL), + DEVLINK_TRAP(EARLY_DROP, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ -- cgit From c88e11e04716ab4ed51d5972ea04c7b70b6e9d8a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 3 Aug 2020 19:11:34 +0300 Subject: devlink: Pass extack when setting trap's action and group's parameters A later patch will refuse to set the action of certain traps in mlxsw and also to change the policer binding of certain groups. Pass extack so that failure could be communicated clearly to user space. Reviewed-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/core/devlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index bde4c29a30bc..e674f0f46dc2 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6423,7 +6423,7 @@ static int __devlink_trap_action_set(struct devlink *devlink, } err = devlink->ops->trap_action_set(devlink, trap_item->trap, - trap_action); + trap_action, extack); if (err) return err; @@ -6713,7 +6713,8 @@ static int devlink_trap_group_set(struct devlink *devlink, } policer = policer_item ? policer_item->policer : NULL; - err = devlink->ops->trap_group_set(devlink, group_item->group, policer); + err = devlink->ops->trap_group_set(devlink, group_item->group, policer, + extack); if (err) return err; @@ -9051,7 +9052,8 @@ static void devlink_trap_disable(struct devlink *devlink, if (WARN_ON_ONCE(!trap_item)) return; - devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP); + devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP, + NULL); trap_item->action = DEVLINK_TRAP_ACTION_DROP; } -- cgit From 8555c6bfd5fddb1cf363d3cd157d70a1bb27f718 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 3 Aug 2020 18:40:39 +0200 Subject: mptcp: fix bogus sendmsg() return code under pressure In case of memory pressure, mptcp_sendmsg() may call sk_stream_wait_memory() after succesfully xmitting some bytes. If the latter fails we currently return to the user-space the error code, ignoring the succeful xmit. Address the issue always checking for the xmitted bytes before mptcp_sendmsg() completes. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Reviewed-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 400824eabf73..8c1d1a595701 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -984,7 +984,6 @@ wait_for_sndbuf: mptcp_set_timeout(sk, ssk); if (copied) { - ret = copied; tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal); @@ -997,7 +996,7 @@ wait_for_sndbuf: release_sock(ssk); out: release_sock(sk); - return ret; + return copied ? : ret; } static void mptcp_wait_data(struct sock *sk, long *timeo) -- cgit From df23bb18b44b9a1f2b54358201730e710a9df57f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 4 Aug 2020 07:53:42 +0200 Subject: ipv4: route: Ignore output interface in FIB lookup for PMTU route Currently, processes sending traffic to a local bridge with an encapsulation device as a port don't get ICMP errors if they exceed the PMTU of the encapsulated link. David Ahern suggested this as a hack, but it actually looks like the correct solution: when we update the PMTU for a given destination by means of updating or creating a route exception, the encapsulation might trigger this because of PMTU discovery happening either on the encapsulation device itself, or its lower layer. This happens on bridged encapsulations only. The output interface shouldn't matter, because we already have a valid destination. Drop the output interface restriction from the associated route lookup. For UDP tunnels, we will now have a route exception created for the encapsulation itself, with a MTU value reflecting its headroom, which allows a bridge forwarding IP packets originated locally to deliver errors back to the sending socket. The behaviour is now consistent with IPv6 and verified with selftests pmtu_ipv{4,6}_br_{geneve,vxlan}{4,6}_exception introduced later in this series. v2: - reset output interface only for bridge ports (David Ahern) - add and use netif_is_any_bridge_port() helper (David Ahern) Suggested-by: David Ahern Signed-off-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a01efa062f6b..8ca6bcab7b03 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1050,6 +1050,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); + + /* Don't make lookup fail for bridged encapsulations */ + if (skb && netif_is_any_bridge_port(skb->dev)) + fl4.flowi4_oif = 0; + __ip_rt_update_pmtu(rt, &fl4, mtu); } -- cgit From 4cb47a8644cc9eb8ec81190a50e79e6530d0297f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 4 Aug 2020 07:53:43 +0200 Subject: tunnels: PMTU discovery support for directly bridged IP packets It's currently possible to bridge Ethernet tunnels carrying IP packets directly to external interfaces without assigning them addresses and routes on the bridged network itself: this is the case for UDP tunnels bridged with a standard bridge or by Open vSwitch. PMTU discovery is currently broken with those configurations, because the encapsulation effectively decreases the MTU of the link, and while we are able to account for this using PMTU discovery on the lower layer, we don't have a way to relay ICMP or ICMPv6 messages needed by the sender, because we don't have valid routes to it. On the other hand, as a tunnel endpoint, we can't fragment packets as a general approach: this is for instance clearly forbidden for VXLAN by RFC 7348, section 4.3: VTEPs MUST NOT fragment VXLAN packets. Intermediate routers may fragment encapsulated VXLAN packets due to the larger frame size. The destination VTEP MAY silently discard such VXLAN fragments. The same paragraph recommends that the MTU over the physical network accomodates for encapsulations, but this isn't a practical option for complex topologies, especially for typical Open vSwitch use cases. Further, it states that: Other techniques like Path MTU discovery (see [RFC1191] and [RFC1981]) MAY be used to address this requirement as well. Now, PMTU discovery already works for routed interfaces, we get route exceptions created by the encapsulation device as they receive ICMP Fragmentation Needed and ICMPv6 Packet Too Big messages, and we already rebuild those messages with the appropriate MTU and route them back to the sender. Add the missing bits for bridged cases: - checks in skb_tunnel_check_pmtu() to understand if it's appropriate to trigger a reply according to RFC 1122 section 3.2.2 for ICMP and RFC 4443 section 2.4 for ICMPv6. This function is already called by UDP tunnels - a new function generating those ICMP or ICMPv6 replies. We can't reuse icmp_send() and icmp6_send() as we don't see the sender as a valid destination. This doesn't need to be generic, as we don't cover any other type of ICMP errors given that we only provide an encapsulation function to the sender While at it, make the MTU check in skb_tunnel_check_pmtu() accurate: we might receive GSO buffers here, and the passed headroom already includes the inner MAC length, so we don't have to account for it a second time (that would imply three MAC headers on the wire, but there are just two). This issue became visible while bridging IPv6 packets with 4500 bytes of payload over GENEVE using IPv4 with a PMTU of 4000. Given the 50 bytes of encapsulation headroom, we would advertise MTU as 3950, and we would reject fragmented IPv6 datagrams of 3958 bytes size on the wire. We're exclusively dealing with network MTU here, though, so we could get Ethernet frames up to 3964 octets in that case. v2: - moved skb_tunnel_check_pmtu() to ip_tunnel_core.c (David Ahern) - split IPv4/IPv6 functions (David Ahern) Signed-off-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 244 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index f8b419e2475c..9ddee2a0c66d 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -184,6 +184,250 @@ int iptunnel_handle_offloads(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); +/** + * iptunnel_pmtud_build_icmp() - Build ICMP error message for PMTUD + * @skb: Original packet with L2 header + * @mtu: MTU value for ICMP error + * + * Return: length on success, negative error code if message couldn't be built. + */ +static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) +{ + const struct iphdr *iph = ip_hdr(skb); + struct icmphdr *icmph; + struct iphdr *niph; + struct ethhdr eh; + int len, err; + + if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) + return -EINVAL; + + skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); + pskb_pull(skb, ETH_HLEN); + skb_reset_network_header(skb); + + err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph)); + if (err) + return err; + + len = skb->len + sizeof(*icmph); + err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN); + if (err) + return err; + + icmph = skb_push(skb, sizeof(*icmph)); + *icmph = (struct icmphdr) { + .type = ICMP_DEST_UNREACH, + .code = ICMP_FRAG_NEEDED, + .checksum = 0, + .un.frag.__unused = 0, + .un.frag.mtu = ntohs(mtu), + }; + icmph->checksum = ip_compute_csum(icmph, len); + skb_reset_transport_header(skb); + + niph = skb_push(skb, sizeof(*niph)); + *niph = (struct iphdr) { + .ihl = sizeof(*niph) / 4u, + .version = 4, + .tos = 0, + .tot_len = htons(len + sizeof(*niph)), + .id = 0, + .frag_off = htons(IP_DF), + .ttl = iph->ttl, + .protocol = IPPROTO_ICMP, + .saddr = iph->daddr, + .daddr = iph->saddr, + }; + ip_send_check(niph); + skb_reset_network_header(skb); + + skb->ip_summed = CHECKSUM_NONE; + + eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0); + skb_reset_mac_header(skb); + + return skb->len; +} + +/** + * iptunnel_pmtud_check_icmp() - Trigger ICMP reply if needed and allowed + * @skb: Buffer being sent by encapsulation, L2 headers expected + * @mtu: Network MTU for path + * + * Return: 0 for no ICMP reply, length if built, negative value on error. + */ +static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) +{ + const struct icmphdr *icmph = icmp_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); + + if (mtu <= 576 || iph->frag_off != htons(IP_DF)) + return 0; + + if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) || + ipv4_is_zeronet(iph->saddr) || ipv4_is_loopback(iph->saddr) || + ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr)) + return 0; + + if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type)) + return 0; + + return iptunnel_pmtud_build_icmp(skb, mtu); +} + +#if IS_ENABLED(CONFIG_IPV6) +/** + * iptunnel_pmtud_build_icmpv6() - Build ICMPv6 error message for PMTUD + * @skb: Original packet with L2 header + * @mtu: MTU value for ICMPv6 error + * + * Return: length on success, negative error code if message couldn't be built. + */ +static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct icmp6hdr *icmp6h; + struct ipv6hdr *nip6h; + struct ethhdr eh; + int len, err; + __wsum csum; + + if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) + return -EINVAL; + + skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); + pskb_pull(skb, ETH_HLEN); + skb_reset_network_header(skb); + + err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h)); + if (err) + return err; + + len = skb->len + sizeof(*icmp6h); + err = skb_cow(skb, sizeof(*nip6h) + sizeof(*icmp6h) + ETH_HLEN); + if (err) + return err; + + icmp6h = skb_push(skb, sizeof(*icmp6h)); + *icmp6h = (struct icmp6hdr) { + .icmp6_type = ICMPV6_PKT_TOOBIG, + .icmp6_code = 0, + .icmp6_cksum = 0, + .icmp6_mtu = htonl(mtu), + }; + skb_reset_transport_header(skb); + + nip6h = skb_push(skb, sizeof(*nip6h)); + *nip6h = (struct ipv6hdr) { + .priority = 0, + .version = 6, + .flow_lbl = { 0 }, + .payload_len = htons(len), + .nexthdr = IPPROTO_ICMPV6, + .hop_limit = ip6h->hop_limit, + .saddr = ip6h->daddr, + .daddr = ip6h->saddr, + }; + skb_reset_network_header(skb); + + csum = csum_partial(icmp6h, len, 0); + icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, + IPPROTO_ICMPV6, csum); + + skb->ip_summed = CHECKSUM_NONE; + + eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0); + skb_reset_mac_header(skb); + + return skb->len; +} + +/** + * iptunnel_pmtud_check_icmpv6() - Trigger ICMPv6 reply if needed and allowed + * @skb: Buffer being sent by encapsulation, L2 headers expected + * @mtu: Network MTU for path + * + * Return: 0 for no ICMPv6 reply, length if built, negative value on error. + */ +static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int stype = ipv6_addr_type(&ip6h->saddr); + u8 proto = ip6h->nexthdr; + __be16 frag_off; + int offset; + + if (mtu <= IPV6_MIN_MTU) + return 0; + + if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST || + stype == IPV6_ADDR_LOOPBACK) + return 0; + + offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto, + &frag_off); + if (offset < 0 || (frag_off & htons(~0x7))) + return 0; + + if (proto == IPPROTO_ICMPV6) { + struct icmp6hdr *icmp6h; + + if (!pskb_may_pull(skb, skb_network_header(skb) + + offset + 1 - skb->data)) + return 0; + + icmp6h = (struct icmp6hdr *)(skb_network_header(skb) + offset); + if (icmpv6_is_err(icmp6h->icmp6_type) || + icmp6h->icmp6_type == NDISC_REDIRECT) + return 0; + } + + return iptunnel_pmtud_build_icmpv6(skb, mtu); +} +#endif /* IS_ENABLED(CONFIG_IPV6) */ + +/** + * skb_tunnel_check_pmtu() - Check, update PMTU and trigger ICMP reply as needed + * @skb: Buffer being sent by encapsulation, L2 headers expected + * @encap_dst: Destination for tunnel encapsulation (outer IP) + * @headroom: Encapsulation header size, bytes + * @reply: Build matching ICMP or ICMPv6 message as a result + * + * L2 tunnel implementations that can carry IP and can be directly bridged + * (currently UDP tunnels) can't always rely on IP forwarding paths to handle + * PMTU discovery. In the bridged case, ICMP or ICMPv6 messages need to be built + * based on payload and sent back by the encapsulation itself. + * + * For routable interfaces, we just need to update the PMTU for the destination. + * + * Return: 0 if ICMP error not needed, length if built, negative value on error + */ +int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, + int headroom, bool reply) +{ + u32 mtu = dst_mtu(encap_dst) - headroom; + + if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || + (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu)) + return 0; + + skb_dst_update_pmtu_no_confirm(skb, mtu); + + if (!reply || skb->pkt_type == PACKET_HOST) + return 0; + + if (skb->protocol == htons(ETH_P_IP)) + return iptunnel_pmtud_check_icmp(skb, mtu); + +#if IS_ENABLED(CONFIG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) + return iptunnel_pmtud_check_icmpv6(skb, mtu); +#endif + return 0; +} +EXPORT_SYMBOL(skb_tunnel_check_pmtu); + /* Often modified stats are per cpu, other are shared (netdev->stats) */ void ip_tunnel_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) -- cgit From 5845589ed652da9b65833044bd2d0bcb1d676ed0 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Wed, 5 Aug 2020 15:19:11 +0800 Subject: net: openvswitch: silence suspicious RCU usage warning ovs_flow_tbl_destroy always is called from RCU callback or error path. It is no need to check if rcu_read_lock or lockdep_ovsl_is_held was held. ovs_dp_cmd_fill_info always is called with ovs_mutex, So use the rcu_dereference_ovsl instead of rcu_dereference in ovs_flow_tbl_masks_cache_size. Fixes: 9bf24f594c6a ("net: openvswitch: make masks cache size configurable") Cc: Eelco Chaudron Reported-by: syzbot+c0eb9e7cdde04e4eb4be@syzkaller.appspotmail.com Reported-by: syzbot+f612c02823acb02ff9bc@syzkaller.appspotmail.com Signed-off-by: Tonghao Zhang Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 6527d84c3ea6..8c12675cbb67 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -518,8 +518,8 @@ void ovs_flow_tbl_destroy(struct flow_table *table) { struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); - struct mask_cache *mc = rcu_dereference(table->mask_cache); - struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); + struct mask_cache *mc = rcu_dereference_raw(table->mask_cache); + struct mask_array *ma = rcu_dereference_raw(table->mask_array); call_rcu(&mc->rcu, mask_cache_rcu_cb); call_rcu(&ma->rcu, mask_array_rcu_cb); @@ -937,7 +937,7 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) { - struct mask_cache *mc = rcu_dereference(table->mask_cache); + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); return READ_ONCE(mc->cache_size); } -- cgit From 81f6cb31222d286dab65579d61f96664eeb99e0b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 3 Aug 2020 23:34:46 +0800 Subject: ipv6: add ipv6_dev_find() This is to add an ip_dev_find like function for ipv6, used to find the dev by saddr. It will be used by TIPC protocol. So also export it. Signed-off-by: Xin Long Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0acf6a9796ca..8e761b8c47c6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1983,6 +1983,45 @@ int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) } EXPORT_SYMBOL(ipv6_chk_prefix); +/** + * ipv6_dev_find - find the first device with a given source address. + * @net: the net namespace + * @addr: the source address + * + * The caller should be protected by RCU, or RTNL. + */ +struct net_device *ipv6_dev_find(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_addr_hash(net, addr); + struct inet6_ifaddr *ifp, *result = NULL; + struct net_device *dev = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { + if (net_eq(dev_net(ifp->idev->dev), net) && + ipv6_addr_equal(&ifp->addr, addr)) { + result = ifp; + break; + } + } + + if (!result) { + struct rt6_info *rt; + + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); + if (rt) { + dev = rt->dst.dev; + ip6_rt_put(rt); + } + } else { + dev = result->idev->dev; + } + rcu_read_unlock(); + + return dev; +} +EXPORT_SYMBOL(ipv6_dev_find); + struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { -- cgit From 5a6f6f579178dbeb33002d93b4f646c31348fac9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 3 Aug 2020 23:34:47 +0800 Subject: tipc: set ub->ifindex for local ipv6 address Without ub->ifindex set for ipv6 address in tipc_udp_enable(), ipv6_sock_mc_join() may make the wrong dev join the multicast address in enable_mcast(). This causes that tipc links would never be created. So fix it by getting the right netdev and setting ub->ifindex, as it does for ipv4 address. Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/udp_media.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index d91b7c543e39..53f0de0676b7 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -738,6 +738,13 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, b->mtu = b->media->mtu; #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { + struct net_device *dev; + + dev = ipv6_dev_find(net, &local.ipv6); + if (!dev) { + err = -ENODEV; + goto err; + } udp_conf.family = AF_INET6; udp_conf.use_udp6_tx_checksums = true; udp_conf.use_udp6_rx_checksums = true; @@ -745,6 +752,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, udp_conf.local_ip6 = in6addr_any; else udp_conf.local_ip6 = local.ipv6; + ub->ifindex = dev->ifindex; b->mtu = 1280; #endif } else { -- cgit From adf7341064982de923a1f8a11bcdec48be6b3004 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 4 Aug 2020 18:31:06 +0200 Subject: mptcp: be careful on subflow creation Nicolas reported the following oops: [ 1521.392541] BUG: kernel NULL pointer dereference, address: 00000000000000c0 [ 1521.394189] #PF: supervisor read access in kernel mode [ 1521.395376] #PF: error_code(0x0000) - not-present page [ 1521.396607] PGD 0 P4D 0 [ 1521.397156] Oops: 0000 [#1] SMP PTI [ 1521.398020] CPU: 0 PID: 22986 Comm: kworker/0:2 Not tainted 5.8.0-rc4+ #109 [ 1521.399618] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 1521.401728] Workqueue: events mptcp_worker [ 1521.402651] RIP: 0010:mptcp_subflow_create_socket+0xf1/0x1c0 [ 1521.403954] Code: 24 08 89 44 24 04 48 8b 7a 18 e8 2a 48 d4 ff 8b 44 24 04 85 c0 75 7a 48 8b 8b 78 02 00 00 48 8b 54 24 08 48 8d bb 80 00 00 00 <48> 8b 89 c0 00 00 00 48 89 8a c0 00 00 00 48 8b 8b 78 02 00 00 8b [ 1521.408201] RSP: 0000:ffffabc4002d3c60 EFLAGS: 00010246 [ 1521.409433] RAX: 0000000000000000 RBX: ffffa0b9ad8c9a00 RCX: 0000000000000000 [ 1521.411096] RDX: ffffa0b9ae78a300 RSI: 00000000fffffe01 RDI: ffffa0b9ad8c9a80 [ 1521.412734] RBP: ffffa0b9adff2e80 R08: ffffa0b9af02d640 R09: ffffa0b9ad923a00 [ 1521.414333] R10: ffffabc4007139f8 R11: fefefefefefefeff R12: ffffabc4002d3cb0 [ 1521.415918] R13: ffffa0b9ad91fa58 R14: ffffa0b9ad8c9f9c R15: 0000000000000000 [ 1521.417592] FS: 0000000000000000(0000) GS:ffffa0b9af000000(0000) knlGS:0000000000000000 [ 1521.419490] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1521.420839] CR2: 00000000000000c0 CR3: 000000002951e006 CR4: 0000000000160ef0 [ 1521.422511] Call Trace: [ 1521.423103] __mptcp_subflow_connect+0x94/0x1f0 [ 1521.425376] mptcp_pm_create_subflow_or_signal_addr+0x200/0x2a0 [ 1521.426736] mptcp_worker+0x31b/0x390 [ 1521.431324] process_one_work+0x1fc/0x3f0 [ 1521.432268] worker_thread+0x2d/0x3b0 [ 1521.434197] kthread+0x117/0x130 [ 1521.435783] ret_from_fork+0x22/0x30 on some unconventional configuration. The MPTCP protocol is trying to create a subflow for an unaccepted server socket. That is allowed by the RFC, even if subflow creation will likely fail. Unaccepted sockets have still a NULL sk_socket field, avoid the issue by failing earlier. Reported-and-tested-by: Nicolas Rybowski Fixes: 7d14b0d2b9b3 ("mptcp: set correct vfs info for subflows") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a4cc4591bd4e..96f4f2fe50ad 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1137,6 +1137,12 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) struct socket *sf; int err; + /* un-accepted server sockets can reach here - on bad configuration + * bail early to avoid greater trouble later + */ + if (unlikely(!sk->sk_socket)) + return -EINVAL; + err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, &sf); if (err) -- cgit From 8ed54f167abda44da48498876953f5b7843378df Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 5 Aug 2020 15:39:31 +0200 Subject: ip_tunnel_core: Fix build for archs without _HAVE_ARCH_IPV6_CSUM On architectures defining _HAVE_ARCH_IPV6_CSUM, we get csum_ipv6_magic() defined by means of arch checksum.h headers. On other architectures, we actually need to include net/ip6_checksum.h to be able to use it. Without this include, building with defconfig breaks at least for s390. Reported-by: Stephen Rothwell Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 9ddee2a0c66d..75c6013ff9a4 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit