summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/fragmentation.c11
-rw-r--r--net/bluetooth/hci_conn.c32
-rw-r--r--net/bluetooth/hci_core.c6
-rw-r--r--net/bluetooth/hci_event.c80
-rw-r--r--net/bluetooth/hci_sync.c11
-rw-r--r--net/bpf/test_run.c5
-rw-r--r--net/bridge/br_input.c7
-rw-r--r--net/bridge/br_switchdev.c2
-rw-r--r--net/can/isotp.c25
-rw-r--r--net/ceph/osd_client.c317
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c6
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_neigh.c3
-rw-r--r--net/decnet/dn_route.c4
-rw-r--r--net/dsa/port.c3
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ipv4/igmp.c9
-rw-r--r--net/ipv4/inet_hashtables.c42
-rw-r--r--net/ipv4/inet_timewait_sock.c58
-rw-r--r--net/ipv4/ip_gre.c12
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c0
-rw-r--r--net/ipv4/ping.c12
-rw-r--r--net/ipv4/route.c24
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/tcp_rate.c11
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_gre.c16
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/netfilter.c10
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/key/af_key.c12
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mctp/device.c2
-rw-r--r--net/mptcp/options.c36
-rw-r--r--net/mptcp/pm.c5
-rw-r--r--net/mptcp/protocol.h19
-rw-r--r--net/mptcp/subflow.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c21
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c60
-rw-r--r--net/netfilter/nf_flow_table_ip.c19
-rw-r--r--net/netfilter/nf_tables_api.c11
-rw-r--r--net/netfilter/nft_flow_offload.c28
-rw-r--r--net/netfilter/nft_set_rbtree.c6
-rw-r--r--net/netfilter/nft_socket.c52
-rw-r--r--net/netlink/af_netlink.c1
-rw-r--r--net/nfc/core.c29
-rw-r--r--net/nfc/nci/data.c2
-rw-r--r--net/nfc/nci/hci.c4
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/rds/tcp.c18
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_connect.c5
-rw-r--r--net/rds/tcp_listen.c5
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/sched/act_pedit.c30
-rw-r--r--net/sctp/sm_sideeffect.c4
-rw-r--r--net/smc/af_smc.c137
-rw-r--r--net/smc/smc.h29
-rw-r--r--net/smc/smc_close.c5
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c1
-rw-r--r--net/sunrpc/clnt.c44
-rw-r--r--net/sunrpc/xprtsock.c35
-rw-r--r--net/tls/tls_device.c15
-rw-r--r--net/wireless/nl80211.c18
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/xdp/xsk.c15
-rw-r--r--net/xdp/xsk_buff_pool.c16
-rw-r--r--net/xfrm/xfrm_policy.c2
82 files changed, 987 insertions, 572 deletions
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0899a729a23f..c120c7c6d25f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 84312c836549..fe803bee419a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -670,7 +670,7 @@ static void le_conn_timeout(struct work_struct *work)
/* Disable LE Advertising */
le_disable_advertising(hdev);
hci_dev_lock(hdev);
- hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+ hci_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
hci_dev_unlock(hdev);
return;
}
@@ -873,7 +873,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
EXPORT_SYMBOL(hci_get_route);
/* This function requires the caller holds hdev->lock */
-void hci_le_conn_failed(struct hci_conn *conn, u8 status)
+static void hci_le_conn_failed(struct hci_conn *conn, u8 status)
{
struct hci_dev *hdev = conn->hdev;
struct hci_conn_params *params;
@@ -886,8 +886,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
params->conn = NULL;
}
- conn->state = BT_CLOSED;
-
/* If the status indicates successful cancellation of
* the attempt (i.e. Unknown Connection Id) there's no point of
* notifying failure since we'll go back to keep trying to
@@ -899,10 +897,6 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
- hci_connect_cfm(conn, status);
-
- hci_conn_del(conn);
-
/* Since we may have temporarily stopped the background scanning in
* favor of connection establishment, we should restart it.
*/
@@ -914,6 +908,28 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
hci_enable_advertising(hdev);
}
+/* This function requires the caller holds hdev->lock */
+void hci_conn_failed(struct hci_conn *conn, u8 status)
+{
+ struct hci_dev *hdev = conn->hdev;
+
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
+
+ switch (conn->type) {
+ case LE_LINK:
+ hci_le_conn_failed(conn, status);
+ break;
+ case ACL_LINK:
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ break;
+ }
+
+ conn->state = BT_CLOSED;
+ hci_connect_cfm(conn, status);
+ hci_conn_del(conn);
+}
+
static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
{
struct hci_conn *conn = data;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b4782a6c1025..45c2dd2e1590 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2555,10 +2555,10 @@ int hci_register_dev(struct hci_dev *hdev)
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -2567,7 +2567,7 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- sprintf(hdev->name, "hci%d", id);
+ snprintf(hdev->name, sizeof(hdev->name), "hci%d", id);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index abaabfae19cc..66451661283c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2834,7 +2834,7 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -2859,7 +2859,7 @@ static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
bt_dev_dbg(hdev, "status 0x%2.2x", status);
/* All connection failure handling is taken care of by the
- * hci_le_conn_failed function which is triggered by the HCI
+ * hci_conn_failed function which is triggered by the HCI
* request completion callbacks used for connecting.
*/
if (status)
@@ -3067,18 +3067,20 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
- if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_Connection_Complete for invalid handle");
- return;
- }
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (ev->status)
+ goto unlock;
+
/* Connection may not exist if auto-connected. Check the bredr
* allowlist to see if this device is allowed to auto connect.
* If link is an ACL type, create a connection class
@@ -3122,8 +3124,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- if (!ev->status) {
+ if (!status) {
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ goto done;
+ }
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
@@ -3164,19 +3172,14 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp),
&cp);
}
- } else {
- conn->state = BT_CLOSED;
- if (conn->type == ACL_LINK)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, ev->status);
}
if (conn->type == ACL_LINK)
hci_sco_setup(conn, ev->status);
- if (ev->status) {
- hci_connect_cfm(conn, ev->status);
- hci_conn_del(conn);
+done:
+ if (status) {
+ hci_conn_failed(conn, status);
} else if (ev->link_type == SCO_LINK) {
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_CVSD:
@@ -3185,7 +3188,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
break;
}
- hci_connect_cfm(conn, ev->status);
+ hci_connect_cfm(conn, status);
}
unlock:
@@ -4676,6 +4679,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
{
struct hci_ev_sync_conn_complete *ev = data;
struct hci_conn *conn;
+ u8 status = ev->status;
switch (ev->link_type) {
case SCO_LINK:
@@ -4690,12 +4694,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
return;
}
- if (__le16_to_cpu(ev->handle) > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_Sync_Conn_Complete for invalid handle");
- return;
- }
-
- bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
+ bt_dev_dbg(hdev, "status 0x%2.2x", status);
hci_dev_lock(hdev);
@@ -4729,9 +4728,17 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
goto unlock;
}
- switch (ev->status) {
+ switch (status) {
case 0x00:
conn->handle = __le16_to_cpu(ev->handle);
+ if (conn->handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x",
+ conn->handle, HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ conn->state = BT_CLOSED;
+ break;
+ }
+
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4775,8 +4782,8 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, void *data,
}
}
- hci_connect_cfm(conn, ev->status);
- if (ev->status)
+ hci_connect_cfm(conn, status);
+ if (status)
hci_conn_del(conn);
unlock:
@@ -5527,11 +5534,6 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
struct smp_irk *irk;
u8 addr_type;
- if (handle > HCI_CONN_HANDLE_MAX) {
- bt_dev_err(hdev, "Ignoring HCI_LE_Connection_Complete for invalid handle");
- return;
- }
-
hci_dev_lock(hdev);
/* All controllers implicitly stop advertising in the event of a
@@ -5541,6 +5543,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn = hci_lookup_le_connect(hdev);
if (!conn) {
+ /* In case of error status and there is no connection pending
+ * just unlock as there is nothing to cleanup.
+ */
+ if (status)
+ goto unlock;
+
conn = hci_conn_add(hdev, LE_LINK, bdaddr, role);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
@@ -5603,8 +5611,14 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = ev_bdaddr_type(hdev, conn->dst_type, NULL);
+ if (handle > HCI_CONN_HANDLE_MAX) {
+ bt_dev_err(hdev, "Invalid handle: 0x%4.4x > 0x%4.4x", handle,
+ HCI_CONN_HANDLE_MAX);
+ status = HCI_ERROR_INVALID_PARAMETERS;
+ }
+
if (status) {
- hci_le_conn_failed(conn, status);
+ hci_conn_failed(conn, status);
goto unlock;
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 8f4c5698913d..13600bf120b0 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -4408,12 +4408,21 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
static int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
u8 reason)
{
+ int err;
+
switch (conn->state) {
case BT_CONNECTED:
case BT_CONFIG:
return hci_disconnect_sync(hdev, conn, reason);
case BT_CONNECT:
- return hci_connect_cancel_sync(hdev, conn);
+ err = hci_connect_cancel_sync(hdev, conn);
+ /* Cleanup hci_conn object if it cannot be cancelled as it
+ * likelly means the controller and host stack are out of sync.
+ */
+ if (err)
+ hci_conn_failed(conn, err);
+
+ return err;
case BT_CONNECT2:
return hci_reject_conn_sync(hdev, conn, reason);
default:
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index e7b9c2636d10..af709c182674 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -108,6 +108,7 @@ struct xdp_test_data {
struct page_pool *pp;
struct xdp_frame **frames;
struct sk_buff **skbs;
+ struct xdp_mem_info mem;
u32 batch_size;
u32 frame_cnt;
};
@@ -147,7 +148,6 @@ static void xdp_test_run_init_page(struct page *page, void *arg)
static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx)
{
- struct xdp_mem_info mem = {};
struct page_pool *pp;
int err = -ENOMEM;
struct page_pool_params pp_params = {
@@ -174,7 +174,7 @@ static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_c
}
/* will copy 'mem.id' into pp->xdp_mem_id */
- err = xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pp);
+ err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp);
if (err)
goto err_mmodel;
@@ -202,6 +202,7 @@ err_skbs:
static void xdp_test_run_teardown(struct xdp_test_data *xdp)
{
+ xdp_unreg_mem_model(&xdp->mem);
page_pool_destroy(xdp->pp);
kfree(xdp->frames);
kfree(xdp->skbs);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 196417859c4a..68b3e850bcb9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -39,6 +39,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
dev_sw_netstats_rx_add(brdev, skb->len);
vg = br_vlan_group_rcu(br);
+
+ /* Reset the offload_fwd_mark because there could be a stacked
+ * bridge above, and it should not think this bridge it doing
+ * that bridge's work forwarding out its ports.
+ */
+ br_switchdev_frame_unmark(skb);
+
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc mode when someone
* may be running packet capture.
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 8cc44c367231..18affda2b522 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -353,6 +353,8 @@ static int br_switchdev_vlan_attr_replay(struct net_device *br_dev,
attr.orig_dev = br_dev;
vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
list_for_each_entry(v, &vg->vlan_list, vlist) {
if (v->msti) {
diff --git a/net/can/isotp.c b/net/can/isotp.c
index ff5d7870294e..1e7c6a460ef9 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1189,6 +1189,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
lock_sock(sk);
+ if (so->bound) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* do not register frame reception for functional addressing */
if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
do_rx_reg = 0;
@@ -1199,10 +1204,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out;
}
- if (so->bound && addr->can_ifindex == so->ifindex &&
- rx_id == so->rxid && tx_id == so->txid)
- goto out;
-
dev = dev_get_by_index(net, addr->can_ifindex);
if (!dev) {
err = -ENODEV;
@@ -1237,22 +1238,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
dev_put(dev);
- if (so->bound && do_rx_reg) {
- /* unregister old filter */
- if (so->ifindex) {
- dev = dev_get_by_index(net, so->ifindex);
- if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
- can_rx_unregister(net, dev, so->txid,
- SINGLE_MASK(so->txid),
- isotp_rcv_echo, sk);
- dev_put(dev);
- }
- }
- }
-
/* switch to new settings */
so->ifindex = ifindex;
so->rxid = rx_id;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1c5815530e0d..9d82bb42e958 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
target_init(&req->r_t);
}
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable. Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
- struct ceph_osd_client *osdc = req->r_osdc;
- bool mempool = req->r_mempool;
- unsigned int num_ops = req->r_num_ops;
- u64 snapid = req->r_snapid;
- struct ceph_snap_context *snapc = req->r_snapc;
- bool linger = req->r_linger;
- struct ceph_msg *request_msg = req->r_request;
- struct ceph_msg *reply_msg = req->r_reply;
-
- dout("%s req %p\n", __func__, req);
- WARN_ON(kref_read(&req->r_kref) != 1);
- request_release_checks(req);
-
- WARN_ON(kref_read(&request_msg->kref) != 1);
- WARN_ON(kref_read(&reply_msg->kref) != 1);
- target_destroy(&req->r_t);
-
- request_init(req);
- req->r_osdc = osdc;
- req->r_mempool = mempool;
- req->r_num_ops = num_ops;
- req->r_snapid = snapid;
- req->r_snapc = snapc;
- req->r_linger = linger;
- req->r_request = request_msg;
- req->r_reply = reply_msg;
-}
-
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
* @watch_opcode: CEPH_OSD_WATCH_OP_*
*/
static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
- u64 cookie, u8 watch_opcode)
+ u8 watch_opcode, u64 cookie, u32 gen)
{
struct ceph_osd_req_op *op;
op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
- op->watch.gen = 0;
+ op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+ u64 cookie, struct ceph_pagelist *request_pl)
+{
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op->notify.cookie = cookie;
+
+ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+ op->indata_len = request_pl->length;
}
/*
@@ -2385,7 +2364,11 @@ again:
if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
err = -ENOSPC;
} else {
- pr_warn_ratelimited("FULL or reached pool quota\n");
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL))
+ pr_warn_ratelimited("cluster is full (osdmap FULL)\n");
+ else
+ pr_warn_ratelimited("pool %lld is full or reached quota\n",
+ req->r_t.base_oloc.pool);
req->r_t.paused = true;
maybe_request_map(osdc);
}
@@ -2727,10 +2710,13 @@ static void linger_release(struct kref *kref)
WARN_ON(!list_empty(&lreq->pending_lworks));
WARN_ON(lreq->osd);
- if (lreq->reg_req)
- ceph_osdc_put_request(lreq->reg_req);
- if (lreq->ping_req)
- ceph_osdc_put_request(lreq->ping_req);
+ if (lreq->request_pl)
+ ceph_pagelist_release(lreq->request_pl);
+ if (lreq->notify_id_pages)
+ ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+ ceph_osdc_put_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->ping_req);
target_destroy(&lreq->t);
kfree(lreq);
}
@@ -2999,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3044,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
lreq, lreq->linger_id, req->r_result, lreq->last_error);
if (req->r_result < 0) {
@@ -3053,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
static void send_linger(struct ceph_osd_linger_request *lreq)
{
- struct ceph_osd_request *req = lreq->reg_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_client *osdc = lreq->osdc;
+ struct ceph_osd_request *req;
+ int ret;
- verify_osdc_wrlocked(req->r_osdc);
+ verify_osdc_wrlocked(osdc);
+ mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->reg_req) {
+ if (lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->reg_req);
+ }
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- request_reinit(req);
target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
- mutex_lock(&lreq->lock);
if (lreq->is_watch && lreq->committed) {
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id);
- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
- op->watch.gen = ++lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+ lreq->linger_id, ++lreq->register_gen);
dout("lreq %p reconnect register_gen %u\n", lreq,
- op->watch.gen);
+ req->r_ops[0].watch.gen);
req->r_callback = linger_reconnect_cb;
} else {
- if (!lreq->is_watch)
+ if (lreq->is_watch) {
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+ lreq->linger_id, 0);
+ } else {
lreq->notify_id = 0;
- else
- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+ refcount_inc(&lreq->request_pl->refcnt);
+ osd_req_op_notify_init(req, 0, lreq->linger_id,
+ lreq->request_pl);
+ ceph_osd_data_pages_init(
+ osd_req_op_data(req, 0, notify, response_data),
+ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+ }
dout("lreq %p register\n", lreq);
req->r_callback = linger_commit_cb;
}
- mutex_unlock(&lreq->lock);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->reg_req = req;
+ mutex_unlock(&lreq->lock);
submit_request(req, true);
}
@@ -3102,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->ping_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
__func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
lreq->last_error);
@@ -3117,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
lreq->register_gen, req->r_ops[0].watch.gen);
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3124,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
static void send_linger_ping(struct ceph_osd_linger_request *lreq)
{
struct ceph_osd_client *osdc = lreq->osdc;
- struct ceph_osd_request *req = lreq->ping_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_request *req;
+ int ret;
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
__func__, lreq, lreq->linger_id, lreq->ping_sent,
lreq->register_gen);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->ping_req) {
+ if (lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+ ceph_osdc_put_request(lreq->ping_req);
+ }
- request_reinit(req);
- target_copy(&req->r_t, &lreq->t);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id ||
- op->watch.op != CEPH_OSD_WATCH_OP_PING);
- op->watch.gen = lreq->register_gen;
+ target_copy(&req->r_t, &lreq->t);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+ lreq->register_gen);
req->r_callback = linger_ping_cb;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
+
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->ping_req = req;
ceph_osdc_get_request(req);
account_request(req);
@@ -3165,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
down_write(&osdc->lock);
linger_register(lreq);
- if (lreq->is_watch) {
- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
- } else {
- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
- }
calc_target(osdc, &lreq->t, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
*/
static void __linger_cancel(struct ceph_osd_linger_request *lreq)
{
- if (lreq->is_watch && lreq->ping_req->r_osd)
+ if (lreq->ping_req && lreq->ping_req->r_osd)
cancel_linger_request(lreq->ping_req);
- if (lreq->reg_req->r_osd)
+ if (lreq->reg_req && lreq->reg_req->r_osd)
cancel_linger_request(lreq->reg_req);
cancel_linger_map_check(lreq);
unlink_linger(lreq->osd, lreq);
@@ -4566,8 +4591,13 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
- * Unregister a registered request. The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request. If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
@@ -4653,43 +4683,6 @@ again:
}
EXPORT_SYMBOL(ceph_osdc_sync);
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
- struct ceph_osd_request *req;
-
- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
- if (!req)
- return NULL;
-
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
- struct ceph_osd_request *req;
-
- req = alloc_linger_request(lreq);
- if (!req)
- return NULL;
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
- ceph_osdc_put_request(req);
- return NULL;
- }
-
- return req;
-}
-
/*
* Returns a handle, caller owns a ref.
*/
@@ -4719,18 +4712,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
- if (!lreq->ping_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (ret) {
@@ -4768,8 +4749,8 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&req->r_mtime);
- osd_req_op_watch_init(req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_UNWATCH);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+ lreq->linger_id, 0);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
@@ -4855,35 +4836,6 @@ out_put_req:
}
EXPORT_SYMBOL(ceph_osdc_notify_ack);
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
- u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, u32 payload_len)
-{
- struct ceph_osd_req_op *op;
- struct ceph_pagelist *pl;
- int ret;
-
- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
- op->notify.cookie = cookie;
-
- pl = ceph_pagelist_alloc(GFP_NOIO);
- if (!pl)
- return -ENOMEM;
-
- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
- ret |= ceph_pagelist_encode_32(pl, timeout);
- ret |= ceph_pagelist_encode_32(pl, payload_len);
- ret |= ceph_pagelist_append(pl, payload, payload_len);
- if (ret) {
- ceph_pagelist_release(pl);
- return -ENOMEM;
- }
-
- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
- op->indata_len = pl->length;
- return 0;
-}
-
/*
* @timeout: in seconds
*
@@ -4902,7 +4854,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
size_t *preply_len)
{
struct ceph_osd_linger_request *lreq;
- struct page **pages;
int ret;
WARN_ON(!timeout);
@@ -4915,41 +4866,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
if (!lreq)
return -ENOMEM;
- lreq->preply_pages = preply_pages;
- lreq->preply_len = preply_len;
-
- ceph_oid_copy(&lreq->t.base_oid, oid);
- ceph_oloc_copy(&lreq->t.base_oloc, oloc);
- lreq->t.flags = CEPH_OSD_FLAG_READ;
-
- lreq->reg_req = alloc_linger_request(lreq);
- if (!lreq->reg_req) {
+ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+ if (!lreq->request_pl) {
ret = -ENOMEM;
goto out_put_lreq;
}
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
- payload, payload_len);
- if (ret)
+ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+ if (ret) {
+ ret = -ENOMEM;
goto out_put_lreq;
+ }
/* for notify_id */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
+ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(lreq->notify_id_pages)) {
+ ret = PTR_ERR(lreq->notify_id_pages);
+ lreq->notify_id_pages = NULL;
goto out_put_lreq;
}
- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
- response_data),
- pages, PAGE_SIZE, 0, false, true);
- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
- if (ret)
- goto out_put_lreq;
+ lreq->preply_pages = preply_pages;
+ lreq->preply_len = preply_len;
+
+ ceph_oid_copy(&lreq->t.base_oid, oid);
+ ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+ lreq->t.flags = CEPH_OSD_FLAG_READ;
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8c6c08446556..2771fd22dc6a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -681,11 +681,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev;
struct net_device_path_ctx ctx = {
.dev = dev,
- .daddr = daddr,
};
struct net_device_path *path;
int ret = 0;
+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev;
@@ -10304,7 +10304,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
{
struct net_device_core_stats __percpu *p;
@@ -10315,11 +10315,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
free_percpu(p);
/* This READ_ONCE() pairs with the cmpxchg() above */
- p = READ_ONCE(dev->core_stats);
- if (!p)
- return NULL;
-
- return this_cpu_ptr(p);
+ return READ_ONCE(dev->core_stats);
}
EXPORT_SYMBOL(netdev_core_stats_alloc);
@@ -10356,9 +10352,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
for_each_possible_cpu(i) {
core_stats = per_cpu_ptr(p, i);
- storage->rx_dropped += local_read(&core_stats->rx_dropped);
- storage->tx_dropped += local_read(&core_stats->tx_dropped);
- storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
+ storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+ storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+ storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
}
}
return storage;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 349480ef68a5..8b6b5e72b217 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9b8443774449..5f85e01d4093 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_aligned_key_t net_secret;
static siphash_aligned_key_t ts_secret;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 30b523fa4ad2..c90c74de90d5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3897,7 +3897,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
struct sk_buff *nskb, *tmp;
- int err;
+ int len_diff, err;
skb_push(skb, -skb_network_offset(skb) + offset);
@@ -3937,9 +3937,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(nskb, -skb_network_offset(nskb) + offset);
skb_release_head_state(nskb);
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
__copy_skb_header(nskb, skb);
skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ nskb->transport_header += len_diff;
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
offset + tnl_hlen);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ae662567a6cb..0ea29270d7e5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v4_ctl_sk);
}
+static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&dccp_hashinfo, AF_INET);
+}
+
static struct pernet_operations dccp_v4_ops = {
.init = dccp_v4_init_net,
.exit = dccp_v4_exit_net,
+ .exit_batch = dccp_v4_exit_batch,
.id = &dccp_v4_pernet_id,
.size = sizeof(struct dccp_v4_pernet),
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index eab3bd1ee9a0..fa663518fa0e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
inet_ctl_sock_destroy(pn->v6_ctl_sk);
}
+static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&dccp_hashinfo, AF_INET6);
+}
+
static struct pernet_operations dccp_v6_ops = {
.init = dccp_v6_init_net,
.exit = dccp_v6_exit_net,
+ .exit_batch = dccp_v6_exit_batch,
.id = &dccp_v6_pernet_id,
.size = sizeof(struct dccp_v6_pernet),
};
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 0ee7d4c0c955..a09ba642b5e7 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -854,7 +854,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
if (dn_db->router) {
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+ struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
dn_dn2eth(msg->neighbor, dn->addr);
}
@@ -902,7 +902,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
{
int n;
struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
+ struct dn_neigh *dn = container_of(dn_db->router, struct dn_neigh, n);
struct sk_buff *skb;
size_t size;
unsigned char *ptr;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 94b306f6d551..fbd98ac853ea 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -426,7 +426,8 @@ int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!dn_db->router) {
dn_db->router = neigh_clone(neigh);
} else {
- if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
+ if (msg->priority > container_of(dn_db->router,
+ struct dn_neigh, n)->priority)
neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
}
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 7e85f2a1ae25..d1d78a463a06 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1120,7 +1120,7 @@ source_ok:
/* Ok then, we assume its directly connected and move on */
select_source:
if (neigh)
- gateway = ((struct dn_neigh *)neigh)->addr;
+ gateway = container_of(neigh, struct dn_neigh, n)->addr;
if (gateway == 0)
gateway = fld.daddr;
if (fld.saddr == 0) {
@@ -1429,7 +1429,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
/* Use the default router if there is one */
neigh = neigh_clone(dn_db->router);
if (neigh) {
- gateway = ((struct dn_neigh *)neigh)->addr;
+ gateway = container_of(neigh, struct dn_neigh, n)->addr;
goto make_route;
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 32d472a82241..bdccb613285d 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -451,6 +451,7 @@ out_rollback_unoffload:
switchdev_bridge_port_unoffload(brport_dev, dp,
&dsa_slave_switchdev_notifier,
&dsa_slave_switchdev_blocking_notifier);
+ dsa_flush_workqueue();
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
out_rollback:
@@ -1620,8 +1621,10 @@ int dsa_port_link_register_of(struct dsa_port *dp)
if (ds->ops->phylink_mac_link_down)
ds->ops->phylink_mac_link_down(ds, port,
MLO_AN_FIXED, PHY_INTERFACE_MODE_NA);
+ of_node_put(phy_np);
return dsa_port_phylink_register(dp);
}
+ of_node_put(phy_np);
return 0;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 41c69a6e7854..8022d50584db 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -285,7 +285,7 @@ static void dsa_port_manage_cpu_flood(struct dsa_port *dp)
if (other_dp->slave->flags & IFF_ALLMULTI)
flags.val |= BR_MCAST_FLOOD;
if (other_dp->slave->flags & IFF_PROMISC)
- flags.val |= BR_FLOOD;
+ flags.val |= BR_FLOOD | BR_MCAST_FLOOD;
}
err = dsa_port_pre_bridge_flags(dp, flags, NULL);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2ad3c7b42d6d..1d9e6d5e9a76 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
/* decrease mem now to avoid the memleak warning */
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2507,11 +2508,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
/* decrease mem now to avoid the memleak warning */
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
- } else
+ } else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
+ }
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 17440840a791..a5d57fa679ca 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -504,7 +504,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -726,15 +726,17 @@ EXPORT_SYMBOL_GPL(inet_unhash);
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
* property might be used by clever attacker.
- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
- * we use 256 instead to really give more isolation and
- * privacy, this only consumes 1 KB of kernel memory.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 instead to really
+ * give more isolation and privacy, at the expense of 256kB of kernel
+ * memory.
*/
-#define INET_TABLE_PERTURB_SHIFT 8
-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+#define INET_TABLE_PERTURB_SHIFT 16
+#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT)
+static u32 *table_perturb;
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
@@ -774,10 +776,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- net_get_random_once(table_perturb, sizeof(table_perturb));
- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+ net_get_random_once(table_perturb,
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+ offset %= remaining;
- offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -831,11 +836,12 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- /* If our first attempt found a candidate, skip next candidate
- * in 1/16 of cases to add some noise.
+ /* Here we want to add a little bit of randomness to the next source
+ * port that will be chosen. We use a max() with a random here so that
+ * on low contention the randomness is maximal and on high contention
+ * it may be inexistent.
*/
- if (!i && !(prandom_u32() % 16))
- i = 2;
+ i = max_t(int, i, (prandom_u32() & 7) * 2);
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
@@ -859,7 +865,7 @@ ok:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk);
@@ -909,6 +915,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
low_limit,
high_limit);
init_hashinfo_lhash2(h);
+
+ /* this one is used for source ports of outgoing connections */
+ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
+ sizeof(*table_perturb), GFP_KERNEL);
+ if (!table_perturb)
+ panic("TCP: failed to alloc table_perturb");
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 9e0bbd026560..0ec501845cb3 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
spin_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[tw->tw_bslot];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- /* Cache inet_bhashfn(), because 'struct net' might be no longer
- * available later in inet_twsk_kill().
- */
- tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
- hashinfo->bhash_size);
- bhead = &hashinfo->bhash[tw->tw_bslot];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
}
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+
+void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+{
+ struct inet_timewait_sock *tw;
+ struct sock *sk;
+ struct hlist_nulls_node *node;
+ unsigned int slot;
+
+ for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
+ struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+restart_rcu:
+ cond_resched();
+ rcu_read_lock();
+restart:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_state != TCP_TIME_WAIT)
+ continue;
+ tw = inet_twsk(sk);
+ if ((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))
+ continue;
+
+ if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+ continue;
+
+ if (unlikely((tw->tw_family != family) ||
+ refcount_read(&twsk_net(tw)->ns.count))) {
+ inet_twsk_put(tw);
+ goto restart;
+ }
+
+ rcu_read_unlock();
+ local_bh_disable();
+ inet_twsk_deschedule_put(tw);
+ local_bh_enable();
+ goto restart_rcu;
+ }
+ /* If the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+ goto restart;
+ rcu_read_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 365caebf51ab..aacee9dd771b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -459,14 +459,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -504,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -581,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ /dev/null
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 3ee947557b88..aa9a11b20d18 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -305,6 +305,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
struct net *net = sock_net(sk);
if (sk->sk_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ u32 tb_id = RT_TABLE_LOCAL;
int chk_addr_ret;
if (addr_len < sizeof(*addr))
@@ -318,7 +319,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
addr->sin_addr.s_addr,
@@ -355,6 +357,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
return -ENODEV;
}
}
+
+ if (!dev && sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
scoped);
rcu_read_unlock();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98c6f3429593..ed01063d8f30 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1726,6 +1726,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
struct rtable *rth;
+ bool no_policy;
u32 itag = 0;
int err;
@@ -1736,8 +1737,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (our)
flags |= RTCF_LOCAL;
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
return -ENOBUFS;
@@ -1753,6 +1758,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
RT_CACHE_STAT_INC(in_slow_mc);
+ skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
return 0;
}
@@ -1795,7 +1801,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct rtable *rth;
int err;
struct in_device *out_dev;
- bool do_cache;
+ bool do_cache, no_policy;
u32 itag = 0;
/* get a working reference to the output device */
@@ -1840,6 +1846,10 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
@@ -1852,8 +1862,7 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- rth = rt_dst_alloc(out_dev->dev, 0, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY),
+ rth = rt_dst_alloc(out_dev->dev, 0, res->type, no_policy,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
err = -ENOBUFS;
@@ -2228,6 +2237,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct rtable *rth;
struct flowi4 fl4;
bool do_cache = true;
+ bool no_policy;
/* IP on this device is disabled. */
@@ -2346,6 +2356,10 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
+ no_policy = IN_DEV_ORCONF(in_dev, NOPOLICY);
+ if (no_policy)
+ IPCB(skb)->flags |= IPSKB_NOPOLICY;
+
do_cache &= res->fi && !itag;
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
@@ -2360,7 +2374,7 @@ local_input:
rth = rt_dst_alloc(ip_rt_get_dev(net, res),
flags | RTCF_LOCAL, res->type,
- IN_DEV_ORCONF(in_dev, NOPOLICY), false);
+ no_policy, false);
if (!rth)
goto e_nobufs;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2cb3b852d148..f33c31dd7366 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -281,6 +281,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
+ const struct tcp_request_sock_ops *af_ops,
struct sock *sk,
struct sk_buff *skb)
{
@@ -297,6 +298,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
return NULL;
treq = tcp_rsk(req);
+
+ /* treq->af_specific might be used to perform TCP_MD5 lookup */
+ treq->af_specific = af_ops;
+
treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
#if IS_ENABLED(CONFIG_MPTCP)
treq->is_mptcp = sk_is_mptcp(sk);
@@ -364,7 +369,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
+ &tcp_request_sock_ipv4_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2088f93fa37b..60f99e9fb6d1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3867,7 +3867,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
num_dupack = 1;
/* Consider if pure acks were aggregated in tcp_add_backlog() */
if (!(flag & FLAG_DATA))
@@ -5454,7 +5455,17 @@ static void tcp_new_space(struct sock *sk)
INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
/* pairs with tcp_poll() */
smp_mb();
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f9cec624068d..457f5b5d5d4a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
+ inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
list_for_each_entry(net, net_exit_list, exit_list)
tcp_fastopen_ctx_destroy(net);
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6366df7aaf2a..6854bb1fb32b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -531,7 +531,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9ede847f4199..1ca2f28c9981 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -82,6 +82,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index fbab921670cc..9a8e014d9b5b 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -74,27 +74,32 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
*
* If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
* called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
*/
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ u64 tx_tstamp;
if (!scb->tx.delivered_mstamp)
return;
+ tx_tstamp = tcp_skb_timestamp_us(skb);
if (!rs->prior_delivered ||
- after(scb->tx.delivered, rs->prior_delivered)) {
+ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ scb->end_seq, rs->last_end_seq)) {
rs->prior_delivered_ce = scb->tx.delivered_ce;
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
+ tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
scb->tx.first_tx_mstamp);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 4740afecf7c6..32ccac10bd62 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -308,7 +308,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 976236736146..5136959b3dc5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -724,6 +724,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
{
struct ip6_tnl *tunnel = netdev_priv(dev);
__be16 protocol;
+ __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -739,7 +740,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
- __be16 flags;
int tun_hlen;
tun_info = skb_tunnel_info_txcheck(skb);
@@ -766,19 +766,19 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
return -ENOMEM;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ flags = tunnel->parms.o_flags;
+
+ gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -1056,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* Push GRE header. */
proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
: htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 909f937befd7..7f695c39d9a8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -460,10 +460,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
newpsl->sl_addr[i] = psl->sl_addr[i];
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
+ rcu_assign_pointer(pmc->sflist, newpsl);
+ kfree_rcu(psl, rcu);
psl = newpsl;
- rcu_assign_pointer(pmc->sflist, psl);
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
for (i = 0; i < psl->sl_count; i++) {
@@ -565,12 +565,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
psl->sl_count, psl->sl_addr, 0);
atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
&sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
}
- mutex_unlock(&idev->mc_lock);
rcu_assign_pointer(pmc->sflist, newpsl);
+ mutex_unlock(&idev->mc_lock);
+ kfree_rcu(psl, rcu);
pmc->sfmode = gsf->gf_fmode;
err = 0;
done:
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1da332450d98..8ce60ab89015 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,14 +24,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(sk_partial);
+ struct net_device *dev = skb_dst(skb)->dev;
struct flow_keys flkeys;
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
- .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- strict ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -39,6 +38,13 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
};
int err;
+ if (sk && sk->sk_bound_dev_if)
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ else if (strict)
+ fl6.flowi6_oif = dev->ifindex;
+ else
+ fl6.flowi6_oif = l3mdev_master_ifindex(dev);
+
fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
dst = ip6_route_output(net, sk, &fl6);
err = dst->error;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d1b61d00368e..9cc123f000fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -170,7 +170,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb);
+ req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
+ &tcp_request_sock_ipv6_ops, sk, skb);
if (!req)
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 13678d3908fa..faaddaf43c90 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2207,9 +2207,15 @@ static void __net_exit tcpv6_net_exit(struct net *net)
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
+static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
+{
+ inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+}
+
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
+ .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fd51db3be91c..339d95df19d3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2826,8 +2826,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
- BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+ err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+ BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+ if (err)
+ return err;
memset(ext_hdrs, 0, sizeof(ext_hdrs));
err = parse_exthdrs(skb, hdr, ext_hdrs);
@@ -2898,7 +2900,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg))
+ if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2916,7 +2918,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg)))
+ if (!(ealg_tmpl_set(t, ealg) && ealg->available))
continue;
for (k = 1; ; k++) {
@@ -2927,7 +2929,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg))
+ if (aalg_tmpl_set(t, aalg) && aalg->available)
sz += sizeof(struct sadb_comb);
}
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1b30c724ca8d..dc8aec1a5d3d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3657,6 +3657,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
cbss->transmitted_bss->bssid);
bss_conf->bssid_indicator = cbss->max_bssid_indicator;
bss_conf->bssid_index = cbss->bssid_index;
+ } else {
+ bss_conf->nontransmitted = false;
+ memset(bss_conf->transmitter_bssid, 0,
+ sizeof(bss_conf->transmitter_bssid));
+ bss_conf->bssid_indicator = 0;
+ bss_conf->bssid_index = 0;
}
/*
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index beb6b92eb780..88d797fa82ff 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1405,8 +1405,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
goto dont_reorder;
/* not part of a BA session */
- if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
- ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
goto dont_reorder;
/* new, potentially un-ordered, ampdu frame - process it */
diff --git a/net/mctp/device.c b/net/mctp/device.c
index f49be882e98e..99a3bda8852f 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev)
void mctp_dev_put(struct mctp_dev *mdev)
{
if (mdev && refcount_dec_and_test(&mdev->refs)) {
+ kfree(mdev->addrs);
dev_put(mdev->dev);
kfree_rcu(mdev, rcu);
}
@@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev)
mctp_route_remove_dev(mdev);
mctp_neigh_remove_dev(mdev);
- kfree(mdev->addrs);
mctp_dev_put(mdev);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 325383646f5c..b548cec86c9d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -107,7 +107,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
}
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
ptr += 2;
}
@@ -221,7 +221,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
- mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum = get_unaligned((__force __sum16 *)ptr);
ptr += 2;
}
@@ -1240,7 +1240,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1256,15 +1256,25 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
header.csum = 0;
csum = csum_partial(&header, sizeof(header), sum);
- return (__force u16)csum_fold(csum);
+ return csum_fold(csum);
}
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static __sum16 mptcp_make_csum(const struct mptcp_ext *mpext)
{
return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
~csum_unfold(mpext->csum));
}
+static void put_len_csum(u16 len, __sum16 csum, void *data)
+{
+ __sum16 *sumptr = data + 2;
+ __be16 *ptr = data;
+
+ put_unaligned_be16(len, ptr);
+
+ put_unaligned(csum, sumptr);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
@@ -1340,8 +1350,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
- put_unaligned_be32(mpext->data_len << 16 |
- mptcp_make_csum(mpext), ptr);
+ put_len_csum(mpext->data_len,
+ mptcp_make_csum(mpext),
+ ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
@@ -1392,11 +1403,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
goto mp_capable_done;
if (opts->csum_reqd) {
- put_unaligned_be32(opts->data_len << 16 |
- __mptcp_make_csum(opts->data_seq,
- opts->subflow_seq,
- opts->data_len,
- ~csum_unfold(opts->csum)), ptr);
+ put_len_csum(opts->data_len,
+ __mptcp_make_csum(opts->data_seq,
+ opts->subflow_seq,
+ opts->data_len,
+ ~csum_unfold(opts->csum)),
+ ptr);
} else {
put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 01809eef29b4..aa51b100e033 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -178,14 +178,13 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = (ssk->sk_state == TCP_CLOSE) &&
- (subflow->request_join || subflow->mp_join);
+ update_subflows = subflow->request_join || subflow->mp_join;
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
spin_lock_bh(&pm->lock);
if (update_subflows)
- pm->subflows--;
+ __mptcp_pm_close_subflow(msk);
/* Even if this subflow is not really established, tell the PM to try
* to pick the next ones, if possible.
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3c1a3036550f..5655a63aa6a8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -443,7 +443,8 @@ struct mptcp_subflow_context {
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1; /* local_id is correctly initialized */
+ local_id_valid : 1, /* local_id is correctly initialized */
+ valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -723,7 +724,7 @@ void mptcp_token_destroy(struct mptcp_sock *msk);
void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
-u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
+__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
@@ -833,6 +834,20 @@ unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk);
+/* called under PM lock */
+static inline void __mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+ if (--msk->pm.subflows < mptcp_pm_get_subflows_max(msk))
+ WRITE_ONCE(msk->pm.accept_subflow, true);
+}
+
+static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
+{
+ spin_lock_bh(&msk->pm.lock);
+ __mptcp_pm_close_subflow(msk);
+ spin_unlock_bh(&msk->pm.lock);
+}
+
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index aba260f547da..be76ada89d96 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -888,7 +888,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
u32 offset, seq, delta;
- u16 csum;
+ __sum16 csum;
int len;
if (!csum_reqd)
@@ -955,11 +955,14 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ if (subflow->mp_join || subflow->valid_csum_seen) {
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ }
return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
}
+ subflow->valid_csum_seen = 1;
return MAPPING_OK;
}
@@ -1141,6 +1144,18 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
}
}
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (subflow->mp_join)
+ return false;
+ else if (READ_ONCE(msk->csum_enabled))
+ return !subflow->valid_csum_seen;
+ else
+ return !subflow->fully_established;
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1218,7 +1233,7 @@ fallback:
return true;
}
- if (subflow->mp_join || subflow->fully_established) {
+ if (!subflow_can_fallback(subflow)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
@@ -1422,20 +1437,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sockaddr_storage addr;
int remote_id = remote->id;
int local_id = loc->id;
+ int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
int ifindex;
u8 flags;
- int err;
if (!mptcp_is_fully_established(sk))
- return -ENOTCONN;
+ goto err_out;
err = mptcp_subflow_create_socket(sk, &sf);
if (err)
- return err;
+ goto err_out;
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1492,6 +1507,12 @@ failed_unlink:
failed:
subflow->disposable = 1;
sock_release(sf);
+
+err_out:
+ /* we account subflows before the creation, and this failures will not
+ * be caught by sk_state_change()
+ */
+ mptcp_pm_close_subflow(msk);
return err;
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 2c467c422dc6..fb67f1ca2495 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1495,7 +1495,7 @@ int __init ip_vs_conn_init(void)
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8ec55cd72572..204a5cdff5b1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -556,24 +556,14 @@ static bool tcp_in_window(struct nf_conn *ct,
}
}
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
+ } else if (tcph->syn &&
+ after(end, sender->td_end) &&
+ (state->state == TCP_CONNTRACK_SYN_SENT ||
+ state->state == TCP_CONNTRACK_SYN_RECV)) {
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, dataoff, tcph, sender);
- } else if (tcph->syn && dir == IP_CT_DIR_REPLY &&
- state->state == TCP_CONNTRACK_SYN_SENT) {
- /* Retransmitted syn-ack, or syn (simultaneous open).
*
* Re-init state for this direction, just like for the first
* syn(-ack) reply, it might differ in seq, ack or tcp options.
@@ -581,7 +571,8 @@ static bool tcp_in_window(struct nf_conn *ct,
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
end, win);
- if (!tcph->ack)
+
+ if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return true;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3e1afd10a9b6..55aa55b252b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -823,7 +823,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
-#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
.procname = "nf_flowtable_udp_timeout",
.maxlen = sizeof(unsigned int),
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 3db256da919b..f2def06d1070 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -179,12 +179,11 @@ EXPORT_SYMBOL_GPL(flow_offload_route_init);
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
- tcp->state = TCP_CONNTRACK_ESTABLISHED;
tcp->seen[0].td_maxwin = 0;
tcp->seen[1].td_maxwin = 0;
}
-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+static void flow_offload_fixup_ct(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
@@ -193,7 +192,9 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ flow_offload_fixup_tcp(&ct->proto.tcp);
+
+ timeout = tn->timeouts[ct->proto.tcp.state];
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
@@ -211,18 +212,6 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
}
-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
-{
- if (nf_ct_protonum(ct) == IPPROTO_TCP)
- flow_offload_fixup_tcp(&ct->proto.tcp);
-}
-
-static void flow_offload_fixup_ct(struct nf_conn *ct)
-{
- flow_offload_fixup_ct_state(ct);
- flow_offload_fixup_ct_timeout(ct);
-}
-
static void flow_offload_route_release(struct flow_offload *flow)
{
nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
@@ -335,8 +324,10 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
- if (READ_ONCE(flow->timeout) != timeout)
+ if (timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
+ else
+ return;
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
@@ -359,22 +350,14 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
-
- clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
-
- if (nf_flow_has_expired(flow))
- flow_offload_fixup_ct(flow->ct);
- else
- flow_offload_fixup_ct_timeout(flow->ct);
-
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
set_bit(NF_FLOW_TEARDOWN, &flow->flags);
-
- flow_offload_fixup_ct_state(flow->ct);
+ flow_offload_fixup_ct(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -438,33 +421,12 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
-static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
-{
- struct dst_entry *dst;
-
- if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
- tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
- dst = tuple->dst_cache;
- if (!dst_check(dst, tuple->dst_cookie))
- return true;
- }
-
- return false;
-}
-
-static bool nf_flow_has_stale_dst(struct flow_offload *flow)
-{
- return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
- flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
-}
-
static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
- nf_ct_is_dying(flow->ct) ||
- nf_flow_has_stale_dst(flow))
- set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+ nf_ct_is_dying(flow->ct))
+ flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
if (test_bit(NF_FLOW_HW, &flow->flags)) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 32c0eb1b4821..b350fe9d00b0 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -248,6 +248,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
return true;
}
+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
+{
+ if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
+ tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
+ return true;
+
+ return dst_check(tuple->dst_cache, tuple->dst_cookie);
+}
+
static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
const struct nf_hook_state *state,
struct dst_entry *dst)
@@ -367,6 +376,11 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
return NF_ACCEPT;
+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
@@ -624,6 +638,11 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
return NF_ACCEPT;
+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
+ flow_offload_teardown(flow);
+ return NF_ACCEPT;
+ }
+
if (skb_try_make_writable(skb, thoff + hdrsize))
return NF_DROP;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 16c3a39689f4..a096b9fbbbdf 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8342,16 +8342,7 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
static bool nft_expr_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
- if (!expr->ops->reduce) {
- pr_warn_once("missing reduce for expression %s ",
- expr->ops->type->name);
- return false;
- }
-
- if (nft_reduce_is_readonly(expr))
- return false;
-
- return expr->ops->reduce(track, expr);
+ return false;
}
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 900d48c810a1..6f0b07fe648d 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -36,6 +36,15 @@ static void nft_default_forward_path(struct nf_flow_route *route,
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
}
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+ return false;
+
+ return true;
+}
+
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
const struct dst_entry *dst_cache,
const struct nf_conn *ct,
@@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct neighbour *n;
u8 nud_state;
+ if (!nft_is_valid_ether_device(dev))
+ goto out;
+
n = dst_neigh_lookup(dst_cache, daddr);
if (!n)
return -1;
@@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
if (!(nud_state & NUD_VALID))
return -1;
+out:
return dev_fill_forward_path(dev, ha, stack);
}
@@ -78,15 +91,6 @@ struct nft_forward_info {
enum flow_offload_xmit_type xmit_type;
};
-static bool nft_is_valid_ether_device(const struct net_device *dev)
-{
- if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
- dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
- return false;
-
- return true;
-}
-
static void nft_dev_path_info(const struct net_device_path_stack *stack,
struct nft_forward_info *info,
unsigned char *ha, struct nf_flowtable *flowtable)
@@ -119,7 +123,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
info->indev = NULL;
break;
}
- info->outdev = path->dev;
+ if (!info->outdev)
+ info->outdev = path->dev;
info->encap[info->num_encaps].id = path->encap.id;
info->encap[info->num_encaps].proto = path->encap.proto;
info->num_encaps++;
@@ -293,7 +298,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
case IPPROTO_TCP:
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
sizeof(_tcph), &_tcph);
- if (unlikely(!tcph || tcph->fin || tcph->rst))
+ if (unlikely(!tcph || tcph->fin || tcph->rst ||
+ !nf_conntrack_tcp_established(ct)))
goto out;
break;
case IPPROTO_UDP:
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index d600a566da32..7325bee7d144 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -349,7 +349,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*ext = &rbe->ext;
return -EEXIST;
} else {
- p = &parent->rb_left;
+ overlap = false;
+ if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 6d9e8e0a3a7d..05ae5a338b6f 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -54,6 +54,32 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
}
#endif
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sk_buff *skb = pkt->skb;
+ struct sock *sk = NULL;
+
+ if (!indev)
+ return NULL;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return sk;
+}
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -67,20 +93,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
sk = NULL;
if (!sk)
- switch(nft_pf(pkt)) {
- case NFPROTO_IPV4:
- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
- break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- case NFPROTO_IPV6:
- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
- break;
-#endif
- default:
- WARN_ON_ONCE(1);
- regs->verdict.code = NFT_BREAK;
- return;
- }
+ sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
@@ -224,6 +237,16 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
return nft_expr_reduce_bitwise(track, expr);
}
+static int nft_socket_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
@@ -231,6 +254,7 @@ static const struct nft_expr_ops nft_socket_ops = {
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
+ .validate = nft_socket_validate,
.reduce = nft_socket_reduce,
};
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 05a3795eac8e..73e9c0a9c187 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = len;
}
- skb_reset_transport_header(data_skb);
err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
if (msg->msg_name) {
diff --git a/net/nfc/core.c b/net/nfc/core.c
index dc7a2404efdf..5b286e1e0a6f 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -38,7 +38,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -94,7 +94,7 @@ int nfc_dev_up(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -142,7 +142,7 @@ int nfc_dev_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -207,7 +207,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -246,7 +246,7 @@ int nfc_stop_poll(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -291,7 +291,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -335,7 +335,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -401,7 +401,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -448,7 +448,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -495,7 +495,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
kfree_skb(skb);
goto error;
@@ -552,7 +552,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -601,7 +601,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -1134,6 +1134,7 @@ int nfc_register_device(struct nfc_dev *dev)
dev->rfkill = NULL;
}
}
+ dev->shutting_down = false;
device_unlock(&dev->dev);
rc = nfc_genl_device_added(dev);
@@ -1166,12 +1167,10 @@ void nfc_unregister_device(struct nfc_dev *dev)
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
}
+ dev->shutting_down = true;
device_unlock(&dev->dev);
if (dev->ops->check_presence) {
- device_lock(&dev->dev);
- dev->shutting_down = true;
- device_unlock(&dev->dev);
del_timer_sync(&dev->check_pres_timer);
cancel_work_sync(&dev->check_pres_work);
}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 6055dc9a82aa..aa5e712adf07 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -118,7 +118,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
skb_frag = nci_skb_alloc(ndev,
(NCI_DATA_HDR_SIZE + frag_len),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (skb_frag == NULL) {
rc = -ENOMEM;
goto free_exit;
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 19703a649b5a..78c4b6addf15 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -153,7 +153,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
i = 0;
skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
@@ -184,7 +184,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
if (i < data_len) {
skb = nci_skb_alloc(ndev,
conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f184b0db79d4..7c62417ccfd7 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1244,7 +1244,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!msg)
return -ENOMEM;
@@ -1260,7 +1260,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
genlmsg_end(msg, hdr);
- genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5327d130c4b5..73ee2771093d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -487,14 +487,27 @@ struct rds_tcp_net {
/* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation.
*/
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
+ /* TCP timer functions might access net namespace even after
+ * a process which created this net namespace terminated.
+ */
+ if (!sk->sk_net_refcnt) {
+ if (!maybe_get_net(net)) {
+ release_sock(sk);
+ return false;
+ }
+ sk->sk_net_refcnt = 1;
+ netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+ }
+ rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@@ -504,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
release_sock(sk);
+ return true;
}
static void rds_tcp_accept_worker(struct work_struct *work)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index dc8d745d6857..f8b5930d7b34 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -49,7 +49,7 @@ struct rds_tcp_statistics {
};
/* tcp.c */
-void rds_tcp_tune(struct socket *sock);
+bool rds_tcp_tune(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 5461d77fff4f..f0c477c5d1db 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
if (ret < 0)
goto out;
- rds_tcp_tune(sock);
+ if (!rds_tcp_tune(sock)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (isv6) {
sin6.sin6_family = AF_INET6;
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 09cadd556d1e..7edf2e69d3fe 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
__module_get(new_sock->ops->owner);
rds_tcp_keepalive(new_sock);
- rds_tcp_tune(new_sock);
+ if (!rds_tcp_tune(new_sock)) {
+ ret = -EINVAL;
+ goto out;
+ }
inet = inet_sk(new_sock->sk);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a4111408ffd0..6a1611b0e303 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -117,6 +117,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
local, srx->transport_type, srx->transport.family);
udp_conf.family = srx->transport.family;
+ udp_conf.use_udp_checksums = true;
if (udp_conf.family == AF_INET) {
udp_conf.local_ip = srx->transport.sin.sin_addr;
udp_conf.local_udp_port = srx->transport.sin.sin_port;
@@ -124,6 +125,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
} else {
udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
+ udp_conf.use_udp6_tx_checksums = true;
+ udp_conf.use_udp6_rx_checksums = true;
#endif
}
ret = udp_sock_create(net, &udp_conf, &local->socket);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 31fcd279c177..211c757bfc3c 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *pattr;
struct tcf_pedit *p;
int ret = 0, err;
- int ksize;
+ int i, ksize;
u32 index;
if (!nla) {
@@ -228,6 +228,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+ p->tcfp_off_max_hint = 0;
+ for (i = 0; i < p->tcfp_nkeys; ++i) {
+ u32 cur = p->tcfp_keys[i].off;
+
+ /* sanitize the shift value for any later use */
+ p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+ p->tcfp_keys[i].shift);
+
+ /* The AT option can read a single byte, we can bound the actual
+ * value with uchar max.
+ */
+ cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+ /* Each key touches 4 bytes starting from the computed offset */
+ p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ }
p->tcfp_flags = parm->flags;
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -308,13 +324,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
+ u32 max_offset;
int i;
- if (skb_unclone(skb, GFP_ATOMIC))
- return p->tcf_action;
-
spin_lock(&p->tcf_lock);
+ max_offset = (skb_transport_header_was_set(skb) ?
+ skb_transport_offset(skb) :
+ skb_network_offset(skb)) +
+ p->tcfp_off_max_hint;
+ if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+ goto unlock;
+
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
@@ -403,6 +424,7 @@ bad:
p->tcf_qstats.overlimits++;
done:
bstats_update(&p->tcf_bstats, skb);
+unlock:
spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b3815b568e8e..463c4a58d2c3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -458,6 +458,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
goto out_unlock;
}
+ /* This happens when the response arrives after the timer is triggered. */
+ if (!asoc->strreset_chunk)
+ goto out_unlock;
+
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
asoc->state, asoc->ep, asoc,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fc7b6eb22143..fce16b9d6e1a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -243,11 +243,27 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);
+static void smc_fback_restore_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = NULL;
+
+ smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
+ smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
+ smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
+ smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
+}
+
static void smc_restore_fallback_changes(struct smc_sock *smc)
{
if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
smc->clcsock->file->private_data = smc->sk.sk_socket;
smc->clcsock->file = NULL;
+ smc_fback_restore_callbacks(smc);
}
}
@@ -373,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
mutex_init(&smc->clcsock_release_lock);
+ smc_init_saved_callbacks(smc);
return sk;
}
@@ -744,47 +761,73 @@ out:
static void smc_fback_state_change(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_state_change);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_data_ready(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_data_ready);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_write_space(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_write_space);
+ read_unlock_bh(&clcsk->sk_callback_lock);
}
static void smc_fback_error_report(struct sock *clcsk)
{
- struct smc_sock *smc =
- smc_clcsock_user_data(clcsk);
+ struct smc_sock *smc;
- if (!smc)
- return;
- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
+ read_lock_bh(&clcsk->sk_callback_lock);
+ smc = smc_clcsock_user_data(clcsk);
+ if (smc)
+ smc_fback_forward_wakeup(smc, clcsk,
+ smc->clcsk_error_report);
+ read_unlock_bh(&clcsk->sk_callback_lock);
+}
+
+static void smc_fback_replace_callbacks(struct smc_sock *smc)
+{
+ struct sock *clcsk = smc->clcsock->sk;
+
+ write_lock_bh(&clcsk->sk_callback_lock);
+ clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+
+ smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
+ &smc->clcsk_state_change);
+ smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
+ &smc->clcsk_data_ready);
+ smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
+ &smc->clcsk_write_space);
+ smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
+ &smc->clcsk_error_report);
+
+ write_unlock_bh(&clcsk->sk_callback_lock);
}
static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
- struct sock *clcsk;
int rc = 0;
mutex_lock(&smc->clcsock_release_lock);
@@ -792,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
rc = -EBADF;
goto out;
}
- clcsk = smc->clcsock->sk;
- if (smc->use_fallback)
- goto out;
smc->use_fallback = true;
smc->fallback_rsn = reason_code;
smc_stat_fallback(smc);
@@ -810,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
* in smc sk->sk_wq and they should be woken up
* as clcsock's wait queue is woken up.
*/
- smc->clcsk_state_change = clcsk->sk_state_change;
- smc->clcsk_data_ready = clcsk->sk_data_ready;
- smc->clcsk_write_space = clcsk->sk_write_space;
- smc->clcsk_error_report = clcsk->sk_error_report;
-
- clcsk->sk_state_change = smc_fback_state_change;
- clcsk->sk_data_ready = smc_fback_data_ready;
- clcsk->sk_write_space = smc_fback_write_space;
- clcsk->sk_error_report = smc_fback_error_report;
-
- smc->clcsock->sk->sk_user_data =
- (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_fback_replace_callbacks(smc);
}
out:
mutex_unlock(&smc->clcsock_release_lock);
@@ -1475,6 +1504,8 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_state = SMC_CLOSED;
if (rc == -EPIPE || rc == -EAGAIN)
smc->sk.sk_err = EPIPE;
+ else if (rc == -ECONNREFUSED)
+ smc->sk.sk_err = ECONNREFUSED;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
sock_put(&smc->sk); /* passive closing */
@@ -1594,6 +1625,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
* function; switch it back to the original sk_data_ready function
*/
new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
+
+ /* if new clcsock has also inherited the fallback-specific callback
+ * functions, switch them back to the original ones.
+ */
+ if (lsmc->use_fallback) {
+ if (lsmc->clcsk_state_change)
+ new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
+ if (lsmc->clcsk_write_space)
+ new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
+ if (lsmc->clcsk_error_report)
+ new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
+ }
+
(*new_smc)->clcsock = new_clcsock;
out:
return rc;
@@ -2353,17 +2397,20 @@ out:
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
- struct smc_sock *lsmc =
- smc_clcsock_user_data(listen_clcsock);
+ struct smc_sock *lsmc;
+ read_lock_bh(&listen_clcsock->sk_callback_lock);
+ lsmc = smc_clcsock_user_data(listen_clcsock);
if (!lsmc)
- return;
+ goto out;
lsmc->clcsk_data_ready(listen_clcsock);
if (lsmc->sk.sk_state == SMC_LISTEN) {
sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
sock_put(&lsmc->sk);
}
+out:
+ read_unlock_bh(&listen_clcsock->sk_callback_lock);
}
static int smc_listen(struct socket *sock, int backlog)
@@ -2395,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog)
/* save original sk_data_ready function and establish
* smc-specific sk_data_ready function
*/
- smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
- smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
+ smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
+ smc_clcsock_data_ready, &smc->clcsk_data_ready);
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
/* save original ops */
smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
@@ -2413,7 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog)
rc = kernel_listen(smc->clcsock, backlog);
if (rc) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
+ smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
goto out;
}
sk->sk_max_ack_backlog = backlog;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ea0620529ebe..5ed765ea0c73 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
+static inline void smc_init_saved_callbacks(struct smc_sock *smc)
+{
+ smc->clcsk_state_change = NULL;
+ smc->clcsk_data_ready = NULL;
+ smc->clcsk_write_space = NULL;
+ smc->clcsk_error_report = NULL;
+}
+
static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
{
return (struct smc_sock *)
((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
}
+/* save target_cb in saved_cb, and replace target_cb with new_cb */
+static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
+ void (*new_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ /* only save once */
+ if (!*saved_cb)
+ *saved_cb = *target_cb;
+ *target_cb = new_cb;
+}
+
+/* restore target_cb to saved_cb, and reset saved_cb to NULL */
+static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
+ void (**saved_cb)(struct sock *))
+{
+ if (!*saved_cb)
+ return;
+ *target_cb = *saved_cb;
+ *saved_cb = NULL;
+}
+
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 676cb2333d3c..31db7438857c 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -214,8 +214,11 @@ again:
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
+ write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
+ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
+ &smc->clcsk_data_ready);
smc->clcsock->sk->sk_user_data = NULL;
+ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
}
smc_close_cleanup_listen(sk);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 51e8eb2933ff..338b9ef806e8 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
break;
}
+ if (!timeo)
+ return -EAGAIN;
if (signal_pending(current)) {
read_done = sock_intr_errno(timeo);
break;
}
- if (!timeo)
- return -EAGAIN;
}
if (!smc_rx_data_available(conn)) {
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 61c276bddaf2..f549e4c05def 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -98,6 +98,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
* done without the correct namespace:
*/
.flags = RPC_CLNT_CREATE_NOPING |
+ RPC_CLNT_CREATE_CONNECTED |
RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
};
struct rpc_clnt *clnt;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index af0174d7ce5a..e2c6eca0271b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -76,6 +76,7 @@ static int rpc_encode_header(struct rpc_task *task,
static int rpc_decode_header(struct rpc_task *task,
struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);
+static int rpc_ping_noreply(struct rpc_clnt *clnt);
static void rpc_check_timeout(struct rpc_task *task);
static void rpc_register_client(struct rpc_clnt *clnt)
@@ -483,6 +484,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
rpc_shutdown_client(clnt);
return ERR_PTR(err);
}
+ } else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+ int err = rpc_ping_noreply(clnt);
+ if (err != 0) {
+ rpc_shutdown_client(clnt);
+ return ERR_PTR(err);
+ }
}
clnt->cl_softrtry = 1;
@@ -1065,10 +1072,13 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt)
static
void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
{
- if (task->tk_xprt &&
- !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
- (task->tk_flags & RPC_TASK_MOVEABLE)))
- return;
+ if (task->tk_xprt) {
+ if (!(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE)))
+ return;
+ xprt_release(task);
+ xprt_put(task->tk_xprt);
+ }
if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
task->tk_xprt = rpc_task_get_first_xprt(clnt);
else
@@ -2706,6 +2716,10 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
+static const struct rpc_procinfo rpcproc_null_noreply = {
+ .p_encode = rpcproc_encode_null,
+};
+
static void
rpc_null_call_prepare(struct rpc_task *task, void *data)
{
@@ -2759,6 +2773,28 @@ static int rpc_ping(struct rpc_clnt *clnt)
return status;
}
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_null_noreply,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .callback_ops = &rpc_null_ops,
+ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
+ };
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8ab64ea46870..650102a9c86a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1419,6 +1419,26 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
+ * xs_local_state_change - callback to handle AF_LOCAL socket state changes
+ * @sk: socket whose state has changed
+ *
+ */
+static void xs_local_state_change(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+
+ if (!(xprt = xprt_from_sock(sk)))
+ return;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+ if (sk->sk_shutdown & SHUTDOWN_MASK) {
+ clear_bit(XPRT_CONNECTED, &xprt->state);
+ /* Trigger the socket release */
+ xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
+ }
+}
+
+/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
*
@@ -1866,6 +1886,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_data_ready;
sk->sk_write_space = xs_udp_write_space;
+ sk->sk_state_change = xs_local_state_change;
sk->sk_error_report = xs_error_report;
xprt_clear_connected(xprt);
@@ -1950,6 +1971,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret;
+ if (transport->file)
+ goto force_disconnect;
+
if (RPC_IS_ASYNC(task)) {
/*
* We want the AF_LOCAL connect to be resolved in the
@@ -1962,11 +1986,17 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
*/
task->tk_rpc_status = -ENOTCONN;
rpc_exit(task, -ENOTCONN);
- return;
+ goto out_wake;
}
ret = xs_local_setup_socket(transport);
if (ret && !RPC_IS_SOFTCONN(task))
msleep_interruptible(15000);
+ return;
+force_disconnect:
+ xprt_force_disconnect(xprt);
+out_wake:
+ xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
@@ -2845,9 +2875,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
- ret = ERR_PTR(xs_local_setup_socket(transport));
- if (ret)
- goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 12f7b56771d9..3919fe2c58c5 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -483,11 +483,13 @@ handle_error:
copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
copy = min_t(size_t, copy, (max_open_record_len - record->len));
- rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
- if (rc)
- goto handle_error;
- tls_append_frag(record, pfrag, copy);
+ if (copy) {
+ rc = tls_device_copy_data(page_address(pfrag->page) +
+ pfrag->offset, copy, msg_iter);
+ if (rc)
+ goto handle_error;
+ tls_append_frag(record, pfrag, copy);
+ }
size -= copy;
if (!size) {
@@ -1345,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev)
/* Device contexts for RX and TX will be freed in on sk_destruct
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+ * Now release the ref taken above.
*/
+ if (refcount_dec_and_test(&ctx->refcount))
+ tls_device_free_ctx(ctx);
}
up_write(&device_offload_lock);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 21e808fcb676..1a3551b6d18b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3173,6 +3173,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
chandef->width =
nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
+ if (chandef->chan->band == NL80211_BAND_S1GHZ) {
+ /* User input error for channel width doesn't match channel */
+ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ attrs[NL80211_ATTR_CHANNEL_WIDTH],
+ "bad channel width");
+ return -EINVAL;
+ }
+ }
if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
@@ -11657,18 +11666,23 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
struct cfg80211_bitrate_mask mask;
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
+ wdev_lock(wdev);
err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
NL80211_ATTR_TX_RATES, &mask,
dev, true);
if (err)
- return err;
+ goto out;
- return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+out:
+ wdev_unlock(wdev);
+ return err;
}
static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4a6d86432910..6d82bd9eaf8c 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1829,7 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
if (tmp && tmp->datalen >= sizeof(struct ieee80211_s1g_oper_ie)) {
struct ieee80211_s1g_oper_ie *s1gop = (void *)tmp->data;
- return s1gop->primary_ch;
+ return s1gop->oper_ch;
}
} else {
tmp = cfg80211_find_elem(WLAN_EID_DS_PARAMS, ie, ielen);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2c34caee0fd1..3a9348030e20 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -639,7 +639,7 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
if (sk_can_busy_loop(sk))
sk_busy_loop(sk, 1); /* only support non-blocking sockets */
- if (xsk_no_wakeup(sk))
+ if (xs->zc && xsk_no_wakeup(sk))
return 0;
pool = xs->pool;
@@ -967,6 +967,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xp_get_pool(umem_xs->pool);
xs->pool = umem_xs->pool;
+
+ /* If underlying shared umem was created without Tx
+ * ring, allocate Tx descs array that Tx batching API
+ * utilizes
+ */
+ if (xs->tx && !xs->pool->tx_descs) {
+ err = xp_alloc_tx_descs(xs->pool, xs);
+ if (err) {
+ xp_put_pool(xs->pool);
+ sockfd_put(sock);
+ goto out_unlock;
+ }
+ }
}
xdp_get_umem(umem_xs->umem);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index af040ffa14ff..87bdd71c7bb6 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -42,6 +42,16 @@ void xp_destroy(struct xsk_buff_pool *pool)
kvfree(pool);
}
+int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs)
+{
+ pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs),
+ GFP_KERNEL);
+ if (!pool->tx_descs)
+ return -ENOMEM;
+
+ return 0;
+}
+
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
@@ -59,11 +69,9 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (!pool->heads)
goto out;
- if (xs->tx) {
- pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL);
- if (!pool->tx_descs)
+ if (xs->tx)
+ if (xp_alloc_tx_descs(pool, xs))
goto out;
- }
pool->chunk_mask = ~((u64)umem->chunk_size - 1);
pool->addrs_cnt = umem->size;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 00bd0ecff5a1..f1876ea61fdc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3744,7 +3744,7 @@ static int stale_bundle(struct dst_entry *dst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
- dst->dev = dev_net(dev)->loopback_dev;
+ dst->dev = blackhole_netdev;
dev_hold(dst->dev);
dev_put(dev);
}