summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c7
-rw-r--r--net/Makefile2
-rw-r--r--net/batman-adv/fragmentation.c4
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/bluetooth/6lowpan.c1
-rw-r--r--net/bluetooth/bnep/core.c3
-rw-r--r--net/bluetooth/cmtp/core.c3
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_core.c60
-rw-r--r--net/bluetooth/hci_event.c36
-rw-r--r--net/bluetooth/hidp/core.c3
-rw-r--r--net/bluetooth/l2cap_core.c5
-rw-r--r--net/bluetooth/mgmt.c99
-rw-r--r--net/bluetooth/smp.c5
-rw-r--r--net/ceph/auth_x.c76
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/buffer.c4
-rw-r--r--net/ceph/ceph_common.c21
-rw-r--r--net/ceph/messenger.c34
-rw-r--r--net/ceph/osd_client.c118
-rw-r--r--net/core/dev.c175
-rw-r--r--net/core/net_namespace.c39
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/core/sock.c24
-rw-r--r--net/dsa/dsa.c1
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/geneve.c36
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_tunnel.c9
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv6/tcp_ipv6.c45
-rw-r--r--net/mpls/mpls_gso.c5
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netlink/af_netlink.c92
-rw-r--r--net/netlink/af_netlink.h8
-rw-r--r--net/netlink/genetlink.c56
-rw-r--r--net/nonet.c26
-rw-r--r--net/openvswitch/actions.c3
-rw-r--r--net/openvswitch/datapath.c3
-rw-r--r--net/openvswitch/flow.c5
-rw-r--r--net/openvswitch/flow_netlink.c13
-rw-r--r--net/openvswitch/vport-geneve.c3
-rw-r--r--net/openvswitch/vport-gre.c18
-rw-r--r--net/openvswitch/vport-vxlan.c2
-rw-r--r--net/openvswitch/vport.c7
-rw-r--r--net/packet/af_packet.c11
-rw-r--r--net/rds/message.c3
-rw-r--r--net/rfkill/rfkill-gpio.c1
-rw-r--r--net/rfkill/rfkill-regulator.c1
-rw-r--r--net/socket.c20
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c26
-rw-r--r--net/sunrpc/svc.c42
-rw-r--r--net/sunrpc/svc_xprt.c292
-rw-r--r--net/sunrpc/svcsock.c5
-rw-r--r--net/sunrpc/xdr.c9
57 files changed, 921 insertions, 571 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 376805005cc7..118956448cf6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -579,11 +579,12 @@ static int vlan_dev_init(struct net_device *dev)
(1<<__LINK_STATE_PRESENT);
dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
- NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
+ NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM |
NETIF_F_ALL_FCOE;
- dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
+ dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
+ NETIF_F_GSO_SOFTWARE;
dev->gso_max_size = real_dev->gso_max_size;
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
@@ -648,7 +649,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
features |= NETIF_F_RXCSUM;
features = netdev_intersect_features(features, real_dev->features);
- features |= old_features & NETIF_F_SOFT_FEATURES;
+ features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
features |= NETIF_F_LLTX;
return features;
diff --git a/net/Makefile b/net/Makefile
index 95fc694e4ddc..38704bdf941a 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -5,8 +5,6 @@
# Rewritten to use lists instead of if-statements.
#
-obj-y := nonet.o
-
obj-$(CONFIG_NET) := socket.o core/
tmp-$(CONFIG_COMPAT) := compat.o
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index fc1835c6bb40..00f9e144cc97 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -251,7 +251,7 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
kfree(entry);
/* Make room for the rest of the fragments. */
- if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) {
+ if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
kfree_skb(skb_out);
skb_out = NULL;
goto free;
@@ -434,7 +434,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
*/
mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
- max_fragment_size = (mtu - header_size - ETH_HLEN);
+ max_fragment_size = mtu - header_size;
max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
/* Don't even try to fragment, if we need more than 16 fragments */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 90cff585b37d..e0bcf9e84273 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -810,7 +810,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
goto out;
gw_node = batadv_gw_node_get(bat_priv, orig_dst_node);
- if (!gw_node->bandwidth_down == 0)
+ if (!gw_node)
goto out;
switch (atomic_read(&bat_priv->gw_mode)) {
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 76617be1e797..c989253737f0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -390,7 +390,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
drop:
dev->stats.rx_dropped++;
- kfree_skb(skb);
return NET_RX_DROP;
}
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 85bcc21e84d2..ce82722d049b 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -533,6 +533,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
BT_DBG("");
+ if (!l2cap_is_socket(sock))
+ return -EBADFD;
+
baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst);
baswap((void *) src, &l2cap_pi(sock->sk)->chan->src);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 67fe5e84e68f..278a194e6af4 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -334,6 +334,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
BT_DBG("");
+ if (!l2cap_is_socket(sock))
+ return -EBADFD;
+
session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL);
if (!session)
return -ENOMEM;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 79d84b88b8f0..fe18825cc8a4 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -661,7 +661,7 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
memset(&cp, 0, sizeof(cp));
/* Update random address, but set require_privacy to false so
- * that we never connect with an unresolvable address.
+ * that we never connect with an non-resolvable address.
*/
if (hci_update_random_address(req, false, &own_addr_type))
return;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 93f92a085506..5dcacf9607e4 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1373,8 +1373,6 @@ static void hci_init1_req(struct hci_request *req, unsigned long opt)
static void bredr_setup(struct hci_request *req)
{
- struct hci_dev *hdev = req->hdev;
-
__le16 param;
__u8 flt_type;
@@ -1403,14 +1401,6 @@ static void bredr_setup(struct hci_request *req)
/* Connection accept timeout ~20 secs */
param = cpu_to_le16(0x7d00);
hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
- /* AVM Berlin (31), aka "BlueFRITZ!", reports version 1.2,
- * but it does not support page scan related HCI commands.
- */
- if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) {
- hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
- hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
- }
}
static void le_setup(struct hci_request *req)
@@ -1718,6 +1708,16 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
if (hdev->commands[5] & 0x10)
hci_setup_link_policy(req);
+ if (hdev->commands[8] & 0x01)
+ hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+
+ /* Some older Broadcom based Bluetooth 1.2 controllers do not
+ * support the Read Page Scan Type command. Check support for
+ * this command in the bit mask of supported commands.
+ */
+ if (hdev->commands[13] & 0x01)
+ hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+
if (lmp_le_capable(hdev)) {
u8 events[8];
@@ -2634,6 +2634,12 @@ static int hci_dev_do_close(struct hci_dev *hdev)
drain_workqueue(hdev->workqueue);
hci_dev_lock(hdev);
+
+ if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+ if (hdev->dev_type == HCI_BREDR)
+ mgmt_powered(hdev, 0);
+ }
+
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
hci_conn_hash_flush(hdev);
@@ -2681,14 +2687,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hdev->flags &= BIT(HCI_RAW);
hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
- if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
- if (hdev->dev_type == HCI_BREDR) {
- hci_dev_lock(hdev);
- mgmt_powered(hdev, 0);
- hci_dev_unlock(hdev);
- }
- }
-
/* Controller radio is available but is currently powered down */
hdev->amp_status = AMP_STATUS_POWERED_DOWN;
@@ -3083,7 +3081,9 @@ static void hci_power_on(struct work_struct *work)
err = hci_dev_do_open(hdev);
if (err < 0) {
+ hci_dev_lock(hdev);
mgmt_set_powered_failed(hdev, err);
+ hci_dev_unlock(hdev);
return;
}
@@ -3959,17 +3959,29 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
}
/* In case of required privacy without resolvable private address,
- * use an unresolvable private address. This is useful for active
+ * use an non-resolvable private address. This is useful for active
* scanning and non-connectable advertising.
*/
if (require_privacy) {
- bdaddr_t urpa;
+ bdaddr_t nrpa;
+
+ while (true) {
+ /* The non-resolvable private address is generated
+ * from random six bytes with the two most significant
+ * bits cleared.
+ */
+ get_random_bytes(&nrpa, 6);
+ nrpa.b[5] &= 0x3f;
- get_random_bytes(&urpa, 6);
- urpa.b[5] &= 0x3f; /* Clear two most significant bits */
+ /* The non-resolvable private address shall not be
+ * equal to the public address.
+ */
+ if (bacmp(&hdev->bdaddr, &nrpa))
+ break;
+ }
*own_addr_type = ADDR_LE_DEV_RANDOM;
- set_random_addr(req, &urpa);
+ set_random_addr(req, &nrpa);
return 0;
}
@@ -5625,7 +5637,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
u8 filter_policy;
/* Set require_privacy to false since no SCAN_REQ are send
- * during passive scanning. Not using an unresolvable address
+ * during passive scanning. Not using an non-resolvable address
* here is important so that peer devices using direct
* advertising with our address will be correctly reported
* by the controller.
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 322abbbbcef9..3f2e8b830cbd 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -242,7 +242,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
if (rp->status)
return;
- if (test_bit(HCI_SETUP, &hdev->dev_flags))
+ if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+ test_bit(HCI_CONFIG, &hdev->dev_flags))
memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH);
}
@@ -257,6 +258,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
if (!sent)
return;
+ hci_dev_lock(hdev);
+
if (!status) {
__u8 param = *((__u8 *) sent);
@@ -268,6 +271,8 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_MGMT, &hdev->dev_flags))
mgmt_auth_enable_complete(hdev, status);
+
+ hci_dev_unlock(hdev);
}
static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -443,6 +448,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
if (!sent)
return;
+ hci_dev_lock(hdev);
+
if (!status) {
if (sent->mode)
hdev->features[1][0] |= LMP_HOST_SSP;
@@ -458,6 +465,8 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
else
clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
}
+
+ hci_dev_unlock(hdev);
}
static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
@@ -471,6 +480,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
if (!sent)
return;
+ hci_dev_lock(hdev);
+
if (!status) {
if (sent->support)
hdev->features[1][0] |= LMP_HOST_SC;
@@ -486,6 +497,8 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
else
clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
}
+
+ hci_dev_unlock(hdev);
}
static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
@@ -497,7 +510,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
if (rp->status)
return;
- if (test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+ test_bit(HCI_CONFIG, &hdev->dev_flags)) {
hdev->hci_ver = rp->hci_ver;
hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
hdev->lmp_ver = rp->lmp_ver;
@@ -516,7 +530,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
if (rp->status)
return;
- if (test_bit(HCI_SETUP, &hdev->dev_flags))
+ if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+ test_bit(HCI_CONFIG, &hdev->dev_flags))
memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
}
@@ -1135,6 +1150,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
if (!cp)
return;
+ hci_dev_lock(hdev);
+
switch (cp->enable) {
case LE_SCAN_ENABLE:
set_bit(HCI_LE_SCAN, &hdev->dev_flags);
@@ -1184,6 +1201,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
break;
}
+
+ hci_dev_unlock(hdev);
}
static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
@@ -1278,6 +1297,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
if (!sent)
return;
+ hci_dev_lock(hdev);
+
if (sent->le) {
hdev->features[1][0] |= LMP_HOST_LE;
set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
@@ -1291,6 +1312,8 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
hdev->features[1][0] |= LMP_HOST_LE_BREDR;
else
hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
+
+ hci_dev_unlock(hdev);
}
static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -2174,7 +2197,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags) &&
+ /* Require HCI_CONNECTABLE or a whitelist entry to accept the
+ * connection. These features are only touched through mgmt so
+ * only do the checks if HCI_MGMT is set.
+ */
+ if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
+ !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) &&
!hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index cc25d0b74b36..07348e142f16 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1314,13 +1314,14 @@ int hidp_connection_add(struct hidp_connadd_req *req,
{
struct hidp_session *session;
struct l2cap_conn *conn;
- struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan;
+ struct l2cap_chan *chan;
int ret;
ret = hidp_verify_sockets(ctrl_sock, intr_sock);
if (ret)
return ret;
+ chan = l2cap_pi(ctrl_sock->sk)->chan;
conn = NULL;
l2cap_chan_lock(chan);
if (chan->conn)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a2b6dfa38a0c..d04dc0095736 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6966,8 +6966,9 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags))
conn->local_fixed_chan |= L2CAP_FC_A2MP;
- if (bredr_sc_enabled(hcon->hdev) &&
- test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
+ if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
+ (bredr_sc_enabled(hcon->hdev) ||
+ test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags)))
conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
mutex_init(&conn->ident_lock);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7384f1161336..693ce8bcd06e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2199,12 +2199,14 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
+ hci_dev_lock(hdev);
+
if (status) {
u8 mgmt_err = mgmt_status(status);
mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
&mgmt_err);
- return;
+ goto unlock;
}
mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
@@ -2222,17 +2224,16 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
struct hci_request req;
- hci_dev_lock(hdev);
-
hci_req_init(&req, hdev);
update_adv_data(&req);
update_scan_rsp_data(&req);
hci_req_run(&req, NULL);
hci_update_background_scan(hdev);
-
- hci_dev_unlock(hdev);
}
+
+unlock:
+ hci_dev_unlock(hdev);
}
static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
@@ -3114,14 +3115,13 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
conn->disconn_cfm_cb = NULL;
hci_conn_drop(conn);
- hci_conn_put(conn);
-
- mgmt_pending_remove(cmd);
/* The device is paired so there is no need to remove
* its connection parameters anymore.
*/
clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
+
+ hci_conn_put(conn);
}
void mgmt_smp_complete(struct hci_conn *conn, bool complete)
@@ -3130,8 +3130,10 @@ void mgmt_smp_complete(struct hci_conn *conn, bool complete)
struct pending_cmd *cmd;
cmd = find_pairing(conn);
- if (cmd)
+ if (cmd) {
cmd->cmd_complete(cmd, status);
+ mgmt_pending_remove(cmd);
+ }
}
static void pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3141,10 +3143,13 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status)
BT_DBG("status %u", status);
cmd = find_pairing(conn);
- if (!cmd)
+ if (!cmd) {
BT_DBG("Unable to find a pending command");
- else
- cmd->cmd_complete(cmd, mgmt_status(status));
+ return;
+ }
+
+ cmd->cmd_complete(cmd, mgmt_status(status));
+ mgmt_pending_remove(cmd);
}
static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3157,10 +3162,13 @@ static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
return;
cmd = find_pairing(conn);
- if (!cmd)
+ if (!cmd) {
BT_DBG("Unable to find a pending command");
- else
- cmd->cmd_complete(cmd, mgmt_status(status));
+ return;
+ }
+
+ cmd->cmd_complete(cmd, mgmt_status(status));
+ mgmt_pending_remove(cmd);
}
static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -3274,8 +3282,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
cmd->user_data = hci_conn_get(conn);
if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) &&
- hci_conn_security(conn, sec_level, auth_type, true))
- pairing_complete(cmd, 0);
+ hci_conn_security(conn, sec_level, auth_type, true)) {
+ cmd->cmd_complete(cmd, 0);
+ mgmt_pending_remove(cmd);
+ }
err = 0;
@@ -3317,7 +3327,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- pairing_complete(cmd, MGMT_STATUS_CANCELLED);
+ cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED);
+ mgmt_pending_remove(cmd);
err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0,
addr, sizeof(*addr));
@@ -3791,7 +3802,7 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
/* All active scans will be done with either a resolvable
* private address (when privacy feature has been enabled)
- * or unresolvable private address.
+ * or non-resolvable private address.
*/
err = hci_update_random_address(req, true, &own_addr_type);
if (err < 0) {
@@ -4279,12 +4290,14 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
+ hci_dev_lock(hdev);
+
if (status) {
u8 mgmt_err = mgmt_status(status);
mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
cmd_status_rsp, &mgmt_err);
- return;
+ goto unlock;
}
if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
@@ -4299,6 +4312,9 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status)
if (match.sk)
sock_put(match.sk);
+
+unlock:
+ hci_dev_unlock(hdev);
}
static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -6081,6 +6097,11 @@ static int powered_update_hci(struct hci_dev *hdev)
hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
}
+ if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
+ u8 sc = 0x01;
+ hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc);
+ }
+
if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
lmp_bredr_capable(hdev)) {
struct hci_cp_write_le_host_supported cp;
@@ -6130,8 +6151,7 @@ static int powered_update_hci(struct hci_dev *hdev)
int mgmt_powered(struct hci_dev *hdev, u8 powered)
{
struct cmd_lookup match = { NULL, hdev };
- u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
- u8 zero_cod[] = { 0, 0, 0 };
+ u8 status, zero_cod[] = { 0, 0, 0 };
int err;
if (!test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -6147,7 +6167,20 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
}
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status_not_powered);
+
+ /* If the power off is because of hdev unregistration let
+ * use the appropriate INVALID_INDEX status. Otherwise use
+ * NOT_POWERED. We cover both scenarios here since later in
+ * mgmt_index_removed() any hci_conn callbacks will have already
+ * been triggered, potentially causing misleading DISCONNECTED
+ * status responses.
+ */
+ if (test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+ status = MGMT_STATUS_INVALID_INDEX;
+ else
+ status = MGMT_STATUS_NOT_POWERED;
+
+ mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -6681,8 +6714,10 @@ void mgmt_auth_failed(struct hci_conn *conn, u8 hci_status)
mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev),
cmd ? cmd->sk : NULL);
- if (cmd)
- pairing_complete(cmd, status);
+ if (cmd) {
+ cmd->cmd_complete(cmd, status);
+ mgmt_pending_remove(cmd);
+ }
}
void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
@@ -7046,13 +7081,15 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
* kept and checking possible scan response data
* will be skipped.
*/
- if (hdev->discovery.uuid_count > 0) {
+ if (hdev->discovery.uuid_count > 0)
match = eir_has_uuids(eir, eir_len,
hdev->discovery.uuid_count,
hdev->discovery.uuids);
- if (!match)
- return;
- }
+ else
+ match = true;
+
+ if (!match && !scan_rsp_len)
+ return;
/* Copy EIR or advertising data into event */
memcpy(ev->eir, eir, eir_len);
@@ -7061,8 +7098,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
* provided, results with empty EIR or advertising data
* should be dropped since they do not match any UUID.
*/
- if (hdev->discovery.uuid_count > 0)
+ if (hdev->discovery.uuid_count > 0 && !scan_rsp_len)
return;
+
+ match = false;
}
if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV))
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6a46252fe66f..b67749bb55bf 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1673,7 +1673,8 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
/* SMP over BR/EDR requires special treatment */
if (conn->hcon->type == ACL_LINK) {
/* We must have a BR/EDR SC link */
- if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags))
+ if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
+ !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
return SMP_CROSS_TRANSP_NOT_ALLOWED;
set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2927,7 +2928,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm_aes)) {
BT_ERR("Unable to create crypto context");
- return ERR_PTR(PTR_ERR(tfm_aes));
+ return ERR_CAST(tfm_aes);
}
create_chan:
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 7e38b729696a..15845814a0f2 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
+#include <linux/ceph/messenger.h>
#include "crypto.h"
#include "auth_x.h"
@@ -293,6 +294,11 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
dout("build_authorizer for %s %p\n",
ceph_entity_type_name(th->service), au);
+ ceph_crypto_key_destroy(&au->session_key);
+ ret = ceph_crypto_key_clone(&au->session_key, &th->session_key);
+ if (ret)
+ return ret;
+
maxlen = sizeof(*msg_a) + sizeof(msg_b) +
ceph_x_encrypt_buflen(ticket_blob_len);
dout(" need len %d\n", maxlen);
@@ -302,8 +308,10 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
}
if (!au->buf) {
au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
- if (!au->buf)
+ if (!au->buf) {
+ ceph_crypto_key_destroy(&au->session_key);
return -ENOMEM;
+ }
}
au->service = th->service;
au->secret_id = th->secret_id;
@@ -329,7 +337,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
get_random_bytes(&au->nonce, sizeof(au->nonce));
msg_b.struct_v = 1;
msg_b.nonce = cpu_to_le64(au->nonce);
- ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
+ ret = ceph_x_encrypt(&au->session_key, &msg_b, sizeof(msg_b),
p, end - p);
if (ret < 0)
goto out_buf;
@@ -560,6 +568,8 @@ static int ceph_x_create_authorizer(
auth->authorizer_buf_len = au->buf->vec.iov_len;
auth->authorizer_reply_buf = au->reply_buf;
auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+ auth->sign_message = ac->ops->sign_message;
+ auth->check_message_signature = ac->ops->check_message_signature;
return 0;
}
@@ -588,17 +598,13 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a, size_t len)
{
struct ceph_x_authorizer *au = (void *)a;
- struct ceph_x_ticket_handler *th;
int ret = 0;
struct ceph_x_authorize_reply reply;
void *preply = &reply;
void *p = au->reply_buf;
void *end = p + sizeof(au->reply_buf);
- th = get_ticket_handler(ac, au->service);
- if (IS_ERR(th))
- return PTR_ERR(th);
- ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply));
+ ret = ceph_x_decrypt(&au->session_key, &p, end, &preply, sizeof(reply));
if (ret < 0)
return ret;
if (ret != sizeof(reply))
@@ -618,6 +624,7 @@ static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
{
struct ceph_x_authorizer *au = (void *)a;
+ ceph_crypto_key_destroy(&au->session_key);
ceph_buffer_put(au->buf);
kfree(au);
}
@@ -663,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
memset(&th->validity, 0, sizeof(th->validity));
}
+static int calcu_signature(struct ceph_x_authorizer *au,
+ struct ceph_msg *msg, __le64 *sig)
+{
+ int ret;
+ char tmp_enc[40];
+ __le32 tmp[5] = {
+ 16u, msg->hdr.crc, msg->footer.front_crc,
+ msg->footer.middle_crc, msg->footer.data_crc,
+ };
+ ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
+ tmp_enc, sizeof(tmp_enc));
+ if (ret < 0)
+ return ret;
+ *sig = *(__le64*)(tmp_enc + 4);
+ return 0;
+}
+
+static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
+ struct ceph_msg *msg)
+{
+ int ret;
+ if (!auth->authorizer)
+ return 0;
+ ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+ msg, &msg->footer.sig);
+ if (ret < 0)
+ return ret;
+ msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
+ return 0;
+}
+
+static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
+ struct ceph_msg *msg)
+{
+ __le64 sig_check;
+ int ret;
+
+ if (!auth->authorizer)
+ return 0;
+ ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+ msg, &sig_check);
+ if (ret < 0)
+ return ret;
+ if (sig_check == msg->footer.sig)
+ return 0;
+ if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED)
+ dout("ceph_x_check_message_signature %p has signature %llx "
+ "expect %llx\n", msg, msg->footer.sig, sig_check);
+ else
+ dout("ceph_x_check_message_signature %p sender did not set "
+ "CEPH_MSG_FOOTER_SIGNED\n", msg);
+ return -EBADMSG;
+}
static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
@@ -677,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
.destroy = ceph_x_destroy,
+ .sign_message = ceph_x_sign_message,
+ .check_message_signature = ceph_x_check_message_signature,
};
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 65ee72082d99..e8b7c6917d47 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -26,6 +26,7 @@ struct ceph_x_ticket_handler {
struct ceph_x_authorizer {
+ struct ceph_crypto_key session_key;
struct ceph_buffer *buf;
unsigned int service;
u64 nonce;
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 621b5f65407f..add5f921a0ff 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -6,7 +6,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
@@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref)
struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
dout("buffer_release %p\n", b);
- ceph_kvfree(b->vec.iov_base);
+ kvfree(b->vec.iov_base);
kfree(b);
}
EXPORT_SYMBOL(ceph_buffer_release);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 58fbfe134f93..5d5ab67f516d 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
}
-void ceph_kvfree(const void *ptr)
-{
- if (is_vmalloc_addr(ptr))
- vfree(ptr);
- else
- kfree(ptr);
-}
-
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
{
@@ -245,6 +237,8 @@ enum {
Opt_noshare,
Opt_crc,
Opt_nocrc,
+ Opt_cephx_require_signatures,
+ Opt_nocephx_require_signatures,
};
static match_table_t opt_tokens = {
@@ -263,6 +257,8 @@ static match_table_t opt_tokens = {
{Opt_noshare, "noshare"},
{Opt_crc, "crc"},
{Opt_nocrc, "nocrc"},
+ {Opt_cephx_require_signatures, "cephx_require_signatures"},
+ {Opt_nocephx_require_signatures, "nocephx_require_signatures"},
{-1, NULL}
};
@@ -461,6 +457,12 @@ ceph_parse_options(char *options, const char *dev_name,
case Opt_nocrc:
opt->flags |= CEPH_OPT_NOCRC;
break;
+ case Opt_cephx_require_signatures:
+ opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+ break;
+ case Opt_nocephx_require_signatures:
+ opt->flags |= CEPH_OPT_NOMSGAUTH;
+ break;
default:
BUG_ON(token);
@@ -504,6 +506,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
init_waitqueue_head(&client->auth_wq);
client->auth_err = 0;
+ if (!ceph_test_opt(client, NOMSGAUTH))
+ required_features |= CEPH_FEATURE_MSG_AUTH;
+
client->extra_mon_dispatch = NULL;
client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
supported_features;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8d1653caffdb..33a2f201e460 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1196,8 +1196,18 @@ static void prepare_write_message_footer(struct ceph_connection *con)
dout("prepare_write_message_footer %p\n", con);
con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
- con->out_kvec[v].iov_len = sizeof(m->footer);
- con->out_kvec_bytes += sizeof(m->footer);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
+ if (con->ops->sign_message)
+ con->ops->sign_message(con, m);
+ else
+ m->footer.sig = 0;
+ con->out_kvec[v].iov_len = sizeof(m->footer);
+ con->out_kvec_bytes += sizeof(m->footer);
+ } else {
+ m->old_footer.flags = m->footer.flags;
+ con->out_kvec[v].iov_len = sizeof(m->old_footer);
+ con->out_kvec_bytes += sizeof(m->old_footer);
+ }
con->out_kvec_left++;
con->out_more = m->more_to_follow;
con->out_msg_done = true;
@@ -2249,6 +2259,7 @@ static int read_partial_message(struct ceph_connection *con)
int ret;
unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
+ bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
u64 seq;
u32 crc;
@@ -2361,12 +2372,21 @@ static int read_partial_message(struct ceph_connection *con)
}
/* footer */
- size = sizeof (m->footer);
+ if (need_sign)
+ size = sizeof(m->footer);
+ else
+ size = sizeof(m->old_footer);
+
end += size;
ret = read_partial(con, end, size, &m->footer);
if (ret <= 0)
return ret;
+ if (!need_sign) {
+ m->footer.flags = m->old_footer.flags;
+ m->footer.sig = 0;
+ }
+
dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
m, front_len, m->footer.front_crc, middle_len,
m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -2390,6 +2410,12 @@ static int read_partial_message(struct ceph_connection *con)
return -EBADMSG;
}
+ if (need_sign && con->ops->check_message_signature &&
+ con->ops->check_message_signature(con, m)) {
+ pr_err("read_partial_message %p signature check failed\n", m);
+ return -EBADMSG;
+ }
+
return 1; /* done! */
}
@@ -3288,7 +3314,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
static void ceph_msg_free(struct ceph_msg *m)
{
dout("%s %p\n", __func__, m);
- ceph_kvfree(m->front.iov_base);
+ kvfree(m->front.iov_base);
kmem_cache_free(ceph_msg_cache, m);
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6f164289bde8..53299c7b0ca4 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
ceph_osd_data_release(&op->cls.request_data);
ceph_osd_data_release(&op->cls.response_data);
break;
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_CMPXATTR:
+ ceph_osd_data_release(&op->xattr.osd_data);
+ break;
default:
break;
}
@@ -476,8 +480,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
size_t payload_len = 0;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
- opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE);
op->extent.offset = offset;
op->extent.length = length;
@@ -545,6 +548,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
}
EXPORT_SYMBOL(osd_req_op_cls_init);
+int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+ u16 opcode, const char *name, const void *value,
+ size_t size, u8 cmp_op, u8 cmp_mode)
+{
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+ struct ceph_pagelist *pagelist;
+ size_t payload_len;
+
+ BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
+
+ pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+ if (!pagelist)
+ return -ENOMEM;
+
+ ceph_pagelist_init(pagelist);
+
+ payload_len = strlen(name);
+ op->xattr.name_len = payload_len;
+ ceph_pagelist_append(pagelist, name, payload_len);
+
+ op->xattr.value_len = size;
+ ceph_pagelist_append(pagelist, value, size);
+ payload_len += size;
+
+ op->xattr.cmp_op = cmp_op;
+ op->xattr.cmp_mode = cmp_mode;
+
+ ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
+ op->payload_len = payload_len;
+ return 0;
+}
+EXPORT_SYMBOL(osd_req_op_xattr_init);
+
void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 cookie, u64 version, int flag)
@@ -626,7 +662,6 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_ZERO:
- case CEPH_OSD_OP_DELETE:
case CEPH_OSD_OP_TRUNCATE:
if (src->op == CEPH_OSD_OP_WRITE)
request_data_len = src->extent.length;
@@ -676,6 +711,19 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
dst->alloc_hint.expected_write_size =
cpu_to_le64(src->alloc_hint.expected_write_size);
break;
+ case CEPH_OSD_OP_SETXATTR:
+ case CEPH_OSD_OP_CMPXATTR:
+ dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
+ dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
+ dst->xattr.cmp_op = src->xattr.cmp_op;
+ dst->xattr.cmp_mode = src->xattr.cmp_mode;
+ osd_data = &src->xattr.osd_data;
+ ceph_osdc_msg_data_add(req->r_request, osd_data);
+ request_data_len = osd_data->pagelist->length;
+ break;
+ case CEPH_OSD_OP_CREATE:
+ case CEPH_OSD_OP_DELETE:
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
@@ -705,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
struct ceph_vino vino,
- u64 off, u64 *plen, int num_ops,
+ u64 off, u64 *plen,
+ unsigned int which, int num_ops,
int opcode, int flags,
struct ceph_snap_context *snapc,
u32 truncate_seq,
@@ -716,13 +765,11 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
u64 objnum = 0;
u64 objoff = 0;
u64 objlen = 0;
- u32 object_size;
- u64 object_base;
int r;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
- opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
+ opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -738,29 +785,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
return ERR_PTR(r);
}
- object_size = le32_to_cpu(layout->fl_object_size);
- object_base = off - objoff;
- if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
- if (truncate_size <= object_base) {
- truncate_size = 0;
- } else {
- truncate_size -= object_base;
- if (truncate_size > object_size)
- truncate_size = object_size;
+ if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
+ osd_req_op_init(req, which, opcode);
+ } else {
+ u32 object_size = le32_to_cpu(layout->fl_object_size);
+ u32 object_base = off - objoff;
+ if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+ if (truncate_size <= object_base) {
+ truncate_size = 0;
+ } else {
+ truncate_size -= object_base;
+ if (truncate_size > object_size)
+ truncate_size = object_size;
+ }
}
+ osd_req_op_extent_init(req, which, opcode, objoff, objlen,
+ truncate_size, truncate_seq);
}
- osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
- truncate_size, truncate_seq);
-
- /*
- * A second op in the ops array means the caller wants to
- * also issue a include a 'startsync' command so that the
- * osd will flush data quickly.
- */
- if (num_ops > 1)
- osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
-
req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
snprintf(req->r_base_oid.name, sizeof(req->r_base_oid.name),
@@ -2626,7 +2668,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
vino.snap, off, *plen);
- req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+ req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, truncate_seq, truncate_size,
false);
@@ -2669,7 +2711,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
int page_align = off & ~PAGE_MASK;
BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
- req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+ req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
@@ -2920,6 +2962,20 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_auth_handshake *auth = &o->o_auth;
+ return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_auth_handshake *auth = &o->o_auth;
+ return ceph_auth_check_message_signature(auth, msg);
+}
+
static const struct ceph_connection_operations osd_con_ops = {
.get = get_osd_con,
.put = put_osd_con,
@@ -2928,5 +2984,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
+ .sign_message = sign_message,
+ .check_message_signature = check_message_signature,
.fault = osd_reset,
};
diff --git a/net/core/dev.c b/net/core/dev.c
index f411c28d0a66..683d493aa1bf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,6 +1694,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_scrub_packet(skb, true);
skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
return 0;
}
@@ -2522,7 +2523,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
-#ifdef CONFIG_NET_MPLS_GSO
+#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
@@ -2562,7 +2563,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
- const struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
u16 gso_segs = skb_shinfo(skb)->gso_segs;
__be16 protocol = skb->protocol;
@@ -2570,11 +2571,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, features);
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
+ if (!vlan_tx_tag_present(skb)) {
+ if (unlikely(protocol == htons(ETH_P_8021Q) ||
+ protocol == htons(ETH_P_8021AD))) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else {
+ goto finalize;
+ }
}
features = netdev_intersect_features(features,
@@ -2591,6 +2602,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
+finalize:
+ if (dev->netdev_ops->ndo_features_check)
+ features &= dev->netdev_ops->ndo_features_check(skb, dev,
+ features);
+
return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);
@@ -2661,19 +2677,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
-
if (netif_needs_gso(dev, skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
if (IS_ERR(segs)) {
- segs = NULL;
+ goto out_kfree_skb;
} else if (segs) {
consume_skb(skb);
skb = segs;
@@ -4557,6 +4566,68 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ void *have;
+ int work, weight;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ /* This NAPI_STATE_SCHED test is for avoiding a race
+ * with netpoll's poll_napi(). Only the entity which
+ * obtains the lock and sees NAPI_STATE_SCHED set will
+ * actually make the ->poll() call. Therefore we avoid
+ * accidentally calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ work = n->poll(n, weight);
+ trace_napi_poll(n);
+ }
+
+ WARN_ON_ONCE(work > weight);
+
+ if (likely(work < weight))
+ goto out_unlock;
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(napi_disable_pending(n))) {
+ napi_complete(n);
+ goto out_unlock;
+ }
+
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ napi_gro_flush(n, HZ >= 1000);
+ }
+
+ /* Some drivers may have called napi_schedule
+ * prior to exhausting their budget.
+ */
+ if (unlikely(!list_empty(&n->poll_list))) {
+ pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
+ n->dev ? n->dev->name : "backlog");
+ goto out_unlock;
+ }
+
+ list_add_tail(&n->poll_list, repoll);
+
+out_unlock:
+ netpoll_poll_unlock(have);
+
+ return work;
+}
+
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4564,74 +4635,34 @@ static void net_rx_action(struct softirq_action *h)
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
- void *have;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
- while (!list_empty(&list)) {
+ for (;;) {
struct napi_struct *n;
- int work, weight;
-
- /* If softirq window is exhausted then punt.
- * Allow this to run for 2 jiffies since which will allow
- * an average latency of 1.5/HZ.
- */
- if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
- goto softnet_break;
-
-
- n = list_first_entry(&list, struct napi_struct, poll_list);
- list_del_init(&n->poll_list);
- have = netpoll_poll_lock(n);
-
- weight = n->weight;
-
- /* This NAPI_STATE_SCHED test is for avoiding a race
- * with netpoll's poll_napi(). Only the entity which
- * obtains the lock and sees NAPI_STATE_SCHED set will
- * actually make the ->poll() call. Therefore we avoid
- * accidentally calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- work = n->poll(n, weight);
- trace_napi_poll(n);
+ if (list_empty(&list)) {
+ if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
+ return;
+ break;
}
- WARN_ON_ONCE(work > weight);
-
- budget -= work;
+ n = list_first_entry(&list, struct napi_struct, poll_list);
+ budget -= napi_poll(n, &repoll);
- /* Drivers must not modify the NAPI state if they
- * consume the entire weight. In such cases this code
- * still "owns" the NAPI instance and therefore can
- * move the instance around on the list at-will.
+ /* If softirq window is exhausted then punt.
+ * Allow this to run for 2 jiffies since which will allow
+ * an average latency of 1.5/HZ.
*/
- if (unlikely(work == weight)) {
- if (unlikely(napi_disable_pending(n))) {
- napi_complete(n);
- } else {
- if (n->gro_list) {
- /* flush too old packets
- * If HZ < 1000, flush all packets.
- */
- napi_gro_flush(n, HZ >= 1000);
- }
- list_add_tail(&n->poll_list, &repoll);
- }
+ if (unlikely(budget <= 0 ||
+ time_after_eq(jiffies, time_limit))) {
+ sd->time_squeeze++;
+ break;
}
-
- netpoll_poll_unlock(have);
}
- if (!sd_has_rps_ipi_waiting(sd) &&
- list_empty(&list) &&
- list_empty(&repoll))
- return;
-out:
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -4641,12 +4672,6 @@ out:
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
-
- return;
-
-softnet_break:
- sd->time_squeeze++;
- goto out;
}
struct netdev_adjacent {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7f155175bba8..ce780c722e48 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -337,17 +337,17 @@ EXPORT_SYMBOL_GPL(__put_net);
struct net *get_net_ns_by_fd(int fd)
{
- struct proc_ns *ei;
struct file *file;
+ struct ns_common *ns;
struct net *net;
file = proc_ns_fget(fd);
if (IS_ERR(file))
return ERR_CAST(file);
- ei = get_proc_ns(file_inode(file));
- if (ei->ns_ops == &netns_operations)
- net = get_net(ei->ns);
+ ns = get_proc_ns(file_inode(file));
+ if (ns->ops == &netns_operations)
+ net = get_net(container_of(ns, struct net, ns));
else
net = ERR_PTR(-EINVAL);
@@ -386,12 +386,15 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
static __net_init int net_ns_net_init(struct net *net)
{
- return proc_alloc_inum(&net->proc_inum);
+#ifdef CONFIG_NET_NS
+ net->ns.ops = &netns_operations;
+#endif
+ return ns_alloc_inum(&net->ns);
}
static __net_exit void net_ns_net_exit(struct net *net)
{
- proc_free_inum(net->proc_inum);
+ ns_free_inum(&net->ns);
}
static struct pernet_operations __net_initdata net_ns_ops = {
@@ -629,7 +632,7 @@ void unregister_pernet_device(struct pernet_operations *ops)
EXPORT_SYMBOL_GPL(unregister_pernet_device);
#ifdef CONFIG_NET_NS
-static void *netns_get(struct task_struct *task)
+static struct ns_common *netns_get(struct task_struct *task)
{
struct net *net = NULL;
struct nsproxy *nsproxy;
@@ -640,17 +643,22 @@ static void *netns_get(struct task_struct *task)
net = get_net(nsproxy->net_ns);
task_unlock(task);
- return net;
+ return net ? &net->ns : NULL;
}
-static void netns_put(void *ns)
+static inline struct net *to_net_ns(struct ns_common *ns)
{
- put_net(ns);
+ return container_of(ns, struct net, ns);
}
-static int netns_install(struct nsproxy *nsproxy, void *ns)
+static void netns_put(struct ns_common *ns)
{
- struct net *net = ns;
+ put_net(to_net_ns(ns));
+}
+
+static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
+{
+ struct net *net = to_net_ns(ns);
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
@@ -661,18 +669,11 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
-static unsigned int netns_inum(void *ns)
-{
- struct net *net = ns;
- return net->proc_inum;
-}
-
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
- .inum = netns_inum,
};
#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d06107d36ec8..9cf6fe9ddc0c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2368,6 +2368,11 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
return err;
}
+ if (vid) {
+ pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+ return err;
+ }
+
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
err = dev_uc_add_excl(dev, addr);
else if (is_multicast_ether_addr(addr))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ae13ef6b3ea7..395c15b82087 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4148,6 +4148,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->ignore_df = 0;
skb_dst_drop(skb);
skb->mark = 0;
+ skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
nf_reset_trace(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 9a56b2000c3f..1c7a33db1314 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1731,18 +1731,34 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
}
EXPORT_SYMBOL(sock_kmalloc);
-/*
- * Free an option memory block.
+/* Free an option memory block. Note, we actually want the inline
+ * here as this allows gcc to detect the nullify and fold away the
+ * condition entirely.
*/
-void sock_kfree_s(struct sock *sk, void *mem, int size)
+static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
+ const bool nullify)
{
if (WARN_ON_ONCE(!mem))
return;
- kfree(mem);
+ if (nullify)
+ kzfree(mem);
+ else
+ kfree(mem);
atomic_sub(size, &sk->sk_omem_alloc);
}
+
+void sock_kfree_s(struct sock *sk, void *mem, int size)
+{
+ __sock_kfree_s(sk, mem, size, false);
+}
EXPORT_SYMBOL(sock_kfree_s);
+void sock_kzfree_s(struct sock *sk, void *mem, int size)
+{
+ __sock_kfree_s(sk, mem, size, true);
+}
+EXPORT_SYMBOL(sock_kzfree_s);
+
/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
I think, these locks should be removed for datagram sockets.
*/
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 322c778487e7..37317149f918 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -879,7 +879,6 @@ static struct platform_driver dsa_driver = {
.shutdown = dsa_shutdown,
.driver = {
.name = "dsa",
- .owner = THIS_MODULE,
.of_match_table = dsa_of_match_table,
.pm = &dsa_pm_ops,
},
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e9cb2588e416..18bcaf2ff2fd 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1143,8 +1143,9 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
put_child(tp, cindex, (struct rt_trie_node *)tn);
} else {
rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
- tp = tn;
}
+
+ tp = tn;
}
if (tp && tp->pos + tp->bits > 32)
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index a457232f0131..394a200f93c1 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -122,14 +122,18 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
int err;
skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
err = skb_cow_head(skb, min_headroom);
- if (unlikely(err))
+ if (unlikely(err)) {
+ kfree_skb(skb);
return err;
+ }
skb = vlan_hwaccel_push_inside(skb);
if (unlikely(!skb))
@@ -159,6 +163,15 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs)
}
}
+static void geneve_notify_del_rx_port(struct geneve_sock *gs)
+{
+ struct sock *sk = gs->sock->sk;
+ sa_family_t sa_family = sk->sk_family;
+
+ if (sa_family == AF_INET)
+ udp_del_offload(&gs->udp_offloads);
+}
+
/* Callback from net/ipv4/udp.c to receive packets */
static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
@@ -287,6 +300,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
geneve_rcv_t *rcv, void *data,
bool no_share, bool ipv6)
{
+ struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_sock *gs;
gs = geneve_socket_create(net, port, rcv, data, ipv6);
@@ -296,15 +310,15 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
if (no_share) /* Return error if sharing is not allowed. */
return ERR_PTR(-EINVAL);
+ spin_lock(&gn->sock_lock);
gs = geneve_find_sock(net, port);
- if (gs) {
- if (gs->rcv == rcv)
- atomic_inc(&gs->refcnt);
- else
+ if (gs && ((gs->rcv != rcv) ||
+ !atomic_add_unless(&gs->refcnt, 1, 0)))
gs = ERR_PTR(-EBUSY);
- } else {
+ spin_unlock(&gn->sock_lock);
+
+ if (!gs)
gs = ERR_PTR(-EINVAL);
- }
return gs;
}
@@ -312,9 +326,17 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
void geneve_sock_release(struct geneve_sock *gs)
{
+ struct net *net = sock_net(gs->sock->sk);
+ struct geneve_net *gn = net_generic(net, geneve_net_id);
+
if (!atomic_dec_and_test(&gs->refcnt))
return;
+ spin_lock(&gn->sock_lock);
+ hlist_del_rcu(&gs->hlist);
+ geneve_notify_del_rx_port(gs);
+ spin_unlock(&gn->sock_lock);
+
queue_work(geneve_wq, &gs->del_work);
}
EXPORT_SYMBOL_GPL(geneve_sock_release);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ac8491245e5b..4f4bf5b99686 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -252,10 +252,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
- skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
- if (IS_ERR(skb))
- goto out;
-
if (dev->header_ops) {
/* Need space for new headers */
if (skb_cow_head(skb, dev->needed_headroom -
@@ -268,6 +264,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
* to gre header.
*/
skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ skb_reset_mac_header(skb);
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
@@ -275,6 +272,10 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
tnl_params = &tunnel->parms.iph;
}
+ skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+ if (IS_ERR(skb))
+ goto out;
+
__gre_xmit(skb, dev, tnl_params, skb->protocol);
return NETDEV_TX_OK;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 63e745aadab6..d3e447936720 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -514,6 +514,9 @@ const struct ip_tunnel_encap_ops __rcu *
int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
unsigned int num)
{
+ if (num >= MAX_IPTUN_ENCAP_OPS)
+ return -ERANGE;
+
return !cmpxchg((const struct ip_tunnel_encap_ops **)
&iptun_encaps[num],
NULL, ops) ? 0 : -1;
@@ -525,6 +528,9 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
{
int ret;
+ if (num >= MAX_IPTUN_ENCAP_OPS)
+ return -ERANGE;
+
ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
&iptun_encaps[num],
ops, NULL) == ops) ? 0 : -1;
@@ -567,6 +573,9 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
if (t->encap.type == TUNNEL_ENCAP_NONE)
return 0;
+ if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+ return -EINVAL;
+
rcu_read_lock();
ops = rcu_dereference(iptun_encaps[t->encap.type]);
if (likely(ops && ops->build_header))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f18262e2326..65caf8b95e17 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2019,7 +2019,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
break;
- if (tso_segs == 1) {
+ if (tso_segs == 1 || !max_segs) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
(tcp_skb_is_last(sk, skb) ?
nonagle : TCP_NAGLE_PUSH))))
@@ -2032,7 +2032,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
limit = mss_now;
- if (tso_segs > 1 && !tcp_urg_mode(tp))
+ if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
min_t(unsigned int,
cwnd_quota,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5ff87805258e..9c0b54e87b47 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1387,6 +1387,28 @@ ipv6_pktoptions:
return 0;
}
+static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
+ const struct tcphdr *th)
+{
+ /* This is tricky: we move IP6CB at its correct location into
+ * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
+ * _decode_session6() uses IP6CB().
+ * barrier() makes sure compiler won't play aliasing games.
+ */
+ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+ sizeof(struct inet6_skb_parm));
+ barrier();
+
+ TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+ TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+ skb->len - th->doff*4);
+ TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+ TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
+ TCP_SKB_CB(skb)->sacked = 0;
+}
+
static int tcp_v6_rcv(struct sk_buff *skb)
{
const struct tcphdr *th;
@@ -1418,24 +1440,9 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
- /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
- * barrier() makes sure compiler wont play fool^Waliasing games.
- */
- memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
- sizeof(struct inet6_skb_parm));
- barrier();
-
- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
- skb->len - th->doff*4);
- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
- TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
- TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
- TCP_SKB_CB(skb)->sacked = 0;
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- tcp_v6_iif(skb));
+ inet6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1451,6 +1458,8 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
+ tcp_v6_fill_cb(skb, hdr, th);
+
#ifdef CONFIG_TCP_MD5SIG
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
@@ -1482,6 +1491,8 @@ no_tcp_socket:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
+ tcp_v6_fill_cb(skb, hdr, th);
+
if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
csum_error:
TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
@@ -1505,6 +1516,8 @@ do_time_wait:
goto discard_it;
}
+ tcp_v6_fill_cb(skb, hdr, th);
+
if (skb->len < (th->doff<<2)) {
inet_twsk_put(inet_twsk(sk));
goto bad_packet;
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index ca27837974fe..349295d21946 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -31,10 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
SKB_GSO_TCPV6 |
SKB_GSO_UDP |
SKB_GSO_DODGY |
- SKB_GSO_TCP_ECN |
- SKB_GSO_GRE |
- SKB_GSO_GRE_CSUM |
- SKB_GSO_IPIP)))
+ SKB_GSO_TCP_ECN)))
goto out;
/* Setup inner SKB. */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 13c2e17bbe27..cde4a6702fa3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -463,7 +463,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
}
#ifdef CONFIG_MODULES
-static int nfnetlink_bind(int group)
+static int nfnetlink_bind(struct net *net, int group)
{
const struct nfnetlink_subsystem *ss;
int type;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ef5f77b44ec7..84ea76ca3f1f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -525,14 +525,14 @@ out:
return err;
}
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
{
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
struct page *p_start, *p_end;
/* First page is flushed through netlink_{get,set}_status */
p_start = pgvec_to_page(hdr + PAGE_SIZE);
- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+ p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
while (p_start <= p_end) {
flush_dcache_page(p_start);
p_start++;
@@ -550,9 +550,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
static void netlink_set_status(struct nl_mmap_hdr *hdr,
enum nl_mmap_status status)
{
+ smp_mb();
hdr->nm_status = status;
flush_dcache_page(pgvec_to_page(hdr));
- smp_wmb();
}
static struct nl_mmap_hdr *
@@ -714,24 +714,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
struct nl_mmap_hdr *hdr;
struct sk_buff *skb;
unsigned int maxlen;
- bool excl = true;
int err = 0, len = 0;
- /* Netlink messages are validated by the receiver before processing.
- * In order to avoid userspace changing the contents of the message
- * after validation, the socket and the ring may only be used by a
- * single process, otherwise we fall back to copying.
- */
- if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
- atomic_read(&nlk->mapped) > 1)
- excl = false;
-
mutex_lock(&nlk->pg_vec_lock);
ring = &nlk->tx_ring;
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
do {
+ unsigned int nm_len;
+
hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
if (hdr == NULL) {
if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -739,35 +731,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
schedule();
continue;
}
- if (hdr->nm_len > maxlen) {
+
+ nm_len = ACCESS_ONCE(hdr->nm_len);
+ if (nm_len > maxlen) {
err = -EINVAL;
goto out;
}
- netlink_frame_flush_dcache(hdr);
+ netlink_frame_flush_dcache(hdr, nm_len);
- if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
- skb = alloc_skb_head(GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- sock_hold(sk);
- netlink_ring_setup_skb(skb, sk, ring, hdr);
- NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
- __skb_put(skb, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
- atomic_inc(&ring->pending);
- } else {
- skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
- if (skb == NULL) {
- err = -ENOBUFS;
- goto out;
- }
- __skb_put(skb, hdr->nm_len);
- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+ skb = alloc_skb(nm_len, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto out;
}
+ __skb_put(skb, nm_len);
+ memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+ netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
netlink_increment_head(ring);
@@ -813,7 +793,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
hdr->nm_pid = NETLINK_CB(skb).creds.pid;
hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
- netlink_frame_flush_dcache(hdr);
+ netlink_frame_flush_dcache(hdr, hdr->nm_len);
netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
@@ -1111,8 +1091,10 @@ static void netlink_remove(struct sock *sk)
mutex_unlock(&nl_sk_hash_lock);
netlink_table_grab();
- if (nlk_sk(sk)->subscriptions)
+ if (nlk_sk(sk)->subscriptions) {
__sk_del_bind_node(sk);
+ netlink_update_listeners(sk);
+ }
netlink_table_ungrab();
}
@@ -1159,8 +1141,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct module *module = NULL;
struct mutex *cb_mutex;
struct netlink_sock *nlk;
- int (*bind)(int group);
- void (*unbind)(int group);
+ int (*bind)(struct net *net, int group);
+ void (*unbind)(struct net *net, int group);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -1246,8 +1228,8 @@ static int netlink_release(struct socket *sock)
module_put(nlk->module);
- netlink_table_grab();
if (netlink_is_kernel(sk)) {
+ netlink_table_grab();
BUG_ON(nl_table[sk->sk_protocol].registered == 0);
if (--nl_table[sk->sk_protocol].registered == 0) {
struct listeners *old;
@@ -1261,11 +1243,16 @@ static int netlink_release(struct socket *sock)
nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
- } else if (nlk->subscriptions) {
- netlink_update_listeners(sk);
+ netlink_table_ungrab();
}
- netlink_table_ungrab();
+ if (nlk->netlink_unbind) {
+ int i;
+
+ for (i = 0; i < nlk->ngroups; i++)
+ if (test_bit(i, nlk->groups))
+ nlk->netlink_unbind(sock_net(sk), i + 1);
+ }
kfree(nlk->groups);
nlk->groups = NULL;
@@ -1430,9 +1417,10 @@ static int netlink_realloc_groups(struct sock *sk)
return err;
}
-static void netlink_unbind(int group, long unsigned int groups,
- struct netlink_sock *nlk)
+static void netlink_undo_bind(int group, long unsigned int groups,
+ struct sock *sk)
{
+ struct netlink_sock *nlk = nlk_sk(sk);
int undo;
if (!nlk->netlink_unbind)
@@ -1440,7 +1428,7 @@ static void netlink_unbind(int group, long unsigned int groups,
for (undo = 0; undo < group; undo++)
if (test_bit(undo, &groups))
- nlk->netlink_unbind(undo);
+ nlk->netlink_unbind(sock_net(sk), undo);
}
static int netlink_bind(struct socket *sock, struct sockaddr *addr,
@@ -1478,10 +1466,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
for (group = 0; group < nlk->ngroups; group++) {
if (!test_bit(group, &groups))
continue;
- err = nlk->netlink_bind(group);
+ err = nlk->netlink_bind(net, group);
if (!err)
continue;
- netlink_unbind(group, groups, nlk);
+ netlink_undo_bind(group, groups, sk);
return err;
}
}
@@ -1491,7 +1479,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
netlink_insert(sk, net, nladdr->nl_pid) :
netlink_autobind(sock);
if (err) {
- netlink_unbind(nlk->ngroups, groups, nlk);
+ netlink_undo_bind(nlk->ngroups, groups, sk);
return err;
}
}
@@ -2142,7 +2130,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
if (!val || val - 1 >= nlk->ngroups)
return -EINVAL;
if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
- err = nlk->netlink_bind(val);
+ err = nlk->netlink_bind(sock_net(sk), val);
if (err)
return err;
}
@@ -2151,7 +2139,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
optname == NETLINK_ADD_MEMBERSHIP);
netlink_table_ungrab();
if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
- nlk->netlink_unbind(val);
+ nlk->netlink_unbind(sock_net(sk), val);
err = 0;
break;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index b20a1731759b..f123a88496f8 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -39,8 +39,8 @@ struct netlink_sock {
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
- int (*netlink_bind)(int group);
- void (*netlink_unbind)(int group);
+ int (*netlink_bind)(struct net *net, int group);
+ void (*netlink_unbind)(struct net *net, int group);
struct module *module;
#ifdef CONFIG_NETLINK_MMAP
struct mutex pg_vec_lock;
@@ -65,8 +65,8 @@ struct netlink_table {
unsigned int groups;
struct mutex *cb_mutex;
struct module *module;
- int (*bind)(int group);
- void (*unbind)(int group);
+ int (*bind)(struct net *net, int group);
+ void (*unbind)(struct net *net, int group);
bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 76393f2f4b22..2e11061ef885 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -983,11 +983,67 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
{ .name = "notify", },
};
+static int genl_bind(struct net *net, int group)
+{
+ int i, err = 0;
+
+ down_read(&cb_lock);
+ for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+ struct genl_family *f;
+
+ list_for_each_entry(f, genl_family_chain(i), family_list) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
+
+ if (!f->netnsok && net != &init_net)
+ err = -ENOENT;
+ else if (f->mcast_bind)
+ err = f->mcast_bind(net, fam_grp);
+ else
+ err = 0;
+ break;
+ }
+ }
+ }
+ up_read(&cb_lock);
+
+ return err;
+}
+
+static void genl_unbind(struct net *net, int group)
+{
+ int i;
+ bool found = false;
+
+ down_read(&cb_lock);
+ for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+ struct genl_family *f;
+
+ list_for_each_entry(f, genl_family_chain(i), family_list) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
+
+ if (f->mcast_unbind)
+ f->mcast_unbind(net, fam_grp);
+ found = true;
+ break;
+ }
+ }
+ }
+ up_read(&cb_lock);
+
+ WARN_ON(!found);
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
+ .bind = genl_bind,
+ .unbind = genl_unbind,
};
/* we'll bump the group number right afterwards */
diff --git a/net/nonet.c b/net/nonet.c
deleted file mode 100644
index b1a73fda9c12..000000000000
--- a/net/nonet.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * net/nonet.c
- *
- * Dummy functions to allow us to configure network support entirely
- * out of the kernel.
- *
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) Matthew Wilcox 2003
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
-{
- return -ENXIO;
-}
-
-const struct file_operations bad_sock_fops = {
- .owner = THIS_MODULE,
- .open = sock_no_open,
- .llseek = noop_llseek,
-};
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 764fdc39c63b..770064c83711 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -147,7 +147,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
hdr = eth_hdr(skb);
hdr->h_proto = mpls->mpls_ethertype;
- skb_set_inner_protocol(skb, skb->protocol);
+ if (!skb->inner_protocol)
+ skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = mpls->mpls_ethertype;
invalidate_flow_key(key);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 332b5a031739..4e9a5f035cbc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -83,8 +83,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
unsigned int group)
{
return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
- genl_has_listeners(family, genl_info_net(info)->genl_sock,
- group);
+ genl_has_listeners(family, genl_info_net(info), group);
}
static void ovs_notify(struct genl_family *family,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 70bef2ab7f2b..da2fae0873a5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -70,6 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
{
struct flow_stats *stats;
int node = numa_node_id();
+ int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
stats = rcu_dereference(flow->stats[node]);
@@ -105,7 +106,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
if (likely(new_stats)) {
new_stats->used = jiffies;
new_stats->packet_count = 1;
- new_stats->byte_count = skb->len;
+ new_stats->byte_count = len;
new_stats->tcp_flags = tcp_flags;
spin_lock_init(&new_stats->lock);
@@ -120,7 +121,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
stats->used = jiffies;
stats->packet_count++;
- stats->byte_count += skb->len;
+ stats->byte_count += len;
stats->tcp_flags |= tcp_flags;
unlock:
spin_unlock(&stats->lock);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 9645a21d9eaa..d1eecf707613 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1753,7 +1753,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
__be16 eth_type, __be16 vlan_tci, bool log)
{
const struct nlattr *a;
- bool out_tnl_port = false;
int rem, err;
if (depth >= SAMPLE_ACTION_DEPTH)
@@ -1796,8 +1795,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_OUTPUT:
if (nla_get_u32(a) >= DP_MAX_PORTS)
return -EINVAL;
- out_tnl_port = false;
-
break;
case OVS_ACTION_ATTR_HASH: {
@@ -1832,12 +1829,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nla_data(a);
- /* Networking stack do not allow simultaneous Tunnel
- * and MPLS GSO.
- */
- if (out_tnl_port)
- return -EINVAL;
-
if (!eth_p_mpls(mpls->mpls_ethertype))
return -EINVAL;
/* Prohibit push MPLS other than to a white list
@@ -1873,11 +1864,9 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
- &out_tnl_port, eth_type, log);
+ &skip_copy, eth_type, log);
if (err)
return err;
-
- skip_copy = out_tnl_port;
break;
case OVS_ACTION_ATTR_SAMPLE:
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 347fa2325b22..484864dd0e68 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -219,7 +219,10 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
false);
if (err < 0)
ip_rt_put(rt);
+ return err;
+
error:
+ kfree_skb(skb);
return err;
}
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 6b69df545b1d..d4168c442db5 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -73,7 +73,7 @@ static struct sk_buff *__build_header(struct sk_buff *skb,
skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
if (IS_ERR(skb))
- return NULL;
+ return skb;
tpi.flags = filter_tnl_flags(tun_key->tun_flags);
tpi.proto = htons(ETH_P_TEB);
@@ -144,7 +144,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
err = -EINVAL;
- goto error;
+ goto err_free_skb;
}
tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
@@ -157,8 +157,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
fl.flowi4_proto = IPPROTO_GRE;
rt = ip_route_output_key(net, &fl);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto err_free_skb;
+ }
tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
@@ -183,8 +185,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
/* Push Tunnel header. */
skb = __build_header(skb, tunnel_hlen);
- if (unlikely(!skb)) {
- err = 0;
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ skb = NULL;
goto err_free_rt;
}
@@ -198,7 +201,8 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
err_free_rt:
ip_rt_put(rt);
-error:
+err_free_skb:
+ kfree_skb(skb);
return err;
}
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 38f95a52241b..d7c46b301024 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -187,7 +187,9 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
false);
if (err < 0)
ip_rt_put(rt);
+ return err;
error:
+ kfree_skb(skb);
return err;
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 9584526c0778..2034c6d9cb5a 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -480,7 +480,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
- stats->rx_bytes += skb->len;
+ stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
u64_stats_update_end(&stats->syncp);
OVS_CB(skb)->input_vport = vport;
@@ -519,10 +519,9 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
u64_stats_update_end(&stats->syncp);
} else if (sent < 0) {
ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
- kfree_skb(skb);
- } else
+ } else {
ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
-
+ }
return sent;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e52a44785681..6880f34a529a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -785,6 +785,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
struct tpacket3_hdr *last_pkt;
struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
+ struct sock *sk = &po->sk;
if (po->stats.stats3.tp_drops)
status |= TP_STATUS_LOSING;
@@ -809,6 +810,8 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
/* Flush the block */
prb_flush_block(pkc1, pbd1, status);
+ sk->sk_data_ready(sk);
+
pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
}
@@ -2052,12 +2055,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
smp_wmb();
#endif
- if (po->tp_version <= TPACKET_V2)
+ if (po->tp_version <= TPACKET_V2) {
__packet_set_status(po, h.raw, status);
- else
+ sk->sk_data_ready(sk);
+ } else {
prb_clear_blk_fill_status(&po->rx_ring);
-
- sk->sk_data_ready(sk);
+ }
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
diff --git a/net/rds/message.c b/net/rds/message.c
index ff2202218187..5a21e6f5986f 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -325,7 +325,8 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
copied = 0;
while (iov_iter_count(to) && copied < len) {
- to_copy = min(iov_iter_count(to), sg->length - vec_off);
+ to_copy = min_t(unsigned long, iov_iter_count(to),
+ sg->length - vec_off);
to_copy = min_t(unsigned long, to_copy, len - copied);
rds_stats_add(s_copy_to_user, to_copy);
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 2a4717967502..3f4a0bbeed3d 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -183,7 +183,6 @@ static struct platform_driver rfkill_gpio_driver = {
.remove = rfkill_gpio_remove,
.driver = {
.name = "rfkill_gpio",
- .owner = THIS_MODULE,
.acpi_match_table = ACPI_PTR(rfkill_acpi_match),
},
};
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index cf5b145902e5..50cd26a48e87 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -142,7 +142,6 @@ static struct platform_driver rfkill_regulator_driver = {
.remove = rfkill_regulator_remove,
.driver = {
.name = "rfkill-regulator",
- .owner = THIS_MODULE,
},
};
diff --git a/net/socket.c b/net/socket.c
index 8809afccf7fa..a2c33a4dc7ba 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -113,7 +113,6 @@ unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
#endif
-static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -151,7 +150,6 @@ static const struct file_operations socket_file_ops = {
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
- .open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
@@ -374,7 +372,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
path.mnt = mntget(sock_mnt);
d_instantiate(path.dentry, SOCK_INODE(sock));
- SOCK_INODE(sock)->i_fop = &socket_file_ops;
file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
&socket_file_ops);
@@ -559,23 +556,6 @@ static struct socket *sock_alloc(void)
return sock;
}
-/*
- * In theory you can't get an open on this inode, but /proc provides
- * a back door. Remember to keep it shut otherwise you'll let the
- * creepy crawlies in.
- */
-
-static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
-{
- return -ENXIO;
-}
-
-const struct file_operations bad_sock_fops = {
- .owner = THIS_MODULE,
- .open = sock_no_open,
- .llseek = noop_llseek,
-};
-
/**
* sock_release - close a socket
* @sock: socket to close
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index de856ddf5fed..224a82f24d3c 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
u32 priv_len, maj_stat;
int pad, saved_len, remaining_len, offset;
- rqstp->rq_splice_ok = false;
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 066362141133..33fb105d4352 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -20,6 +20,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/ctype.h>
+#include <linux/string_helpers.h>
#include <asm/uaccess.h>
#include <linux/poll.h>
#include <linux/seq_file.h>
@@ -1067,30 +1068,15 @@ void qword_add(char **bpp, int *lp, char *str)
{
char *bp = *bpp;
int len = *lp;
- char c;
+ int ret;
if (len < 0) return;
- while ((c=*str++) && len)
- switch(c) {
- case ' ':
- case '\t':
- case '\n':
- case '\\':
- if (len >= 4) {
- *bp++ = '\\';
- *bp++ = '0' + ((c & 0300)>>6);
- *bp++ = '0' + ((c & 0070)>>3);
- *bp++ = '0' + ((c & 0007)>>0);
- }
- len -= 4;
- break;
- default:
- *bp++ = c;
- len--;
- }
- if (c || len <1) len = -1;
+ ret = string_escape_str(str, &bp, len, ESCAPE_OCTAL, "\\ \n\t");
+ if (ret < 0 || ret == len)
+ len = -1;
else {
+ len -= ret;
*bp++ = ' ';
len--;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2783fd80c229..91eaef1844c8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -191,7 +191,7 @@ svc_pool_map_init_percpu(struct svc_pool_map *m)
return err;
for_each_online_cpu(cpu) {
- BUG_ON(pidx > maxpools);
+ BUG_ON(pidx >= maxpools);
m->to_pool[cpu] = pidx;
m->pool_to[pidx] = cpu;
pidx++;
@@ -476,15 +476,11 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
i, serv->sv_name);
pool->sp_id = i;
- INIT_LIST_HEAD(&pool->sp_threads);
INIT_LIST_HEAD(&pool->sp_sockets);
INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
}
- if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown))
- serv->sv_shutdown = svc_rpcb_cleanup;
-
return serv;
}
@@ -505,13 +501,15 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, shutdown);
+ if (!serv)
+ goto out_err;
- if (serv != NULL) {
- serv->sv_function = func;
- serv->sv_module = mod;
- }
-
+ serv->sv_function = func;
+ serv->sv_module = mod;
return serv;
+out_err:
+ svc_pool_map_put();
+ return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
@@ -615,12 +613,14 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
goto out_enomem;
serv->sv_nrthreads++;
+ __set_bit(RQ_BUSY, &rqstp->rq_flags);
+ spin_lock_init(&rqstp->rq_lock);
+ rqstp->rq_server = serv;
+ rqstp->rq_pool = pool;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
- list_add(&rqstp->rq_all, &pool->sp_all_threads);
+ list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
spin_unlock_bh(&pool->sp_lock);
- rqstp->rq_server = serv;
- rqstp->rq_pool = pool;
rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
if (!rqstp->rq_argp)
@@ -685,7 +685,8 @@ found_pool:
* so we don't try to kill it again.
*/
rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
- list_del_init(&rqstp->rq_all);
+ set_bit(RQ_VICTIM, &rqstp->rq_flags);
+ list_del_rcu(&rqstp->rq_all);
task = rqstp->rq_task;
}
spin_unlock_bh(&pool->sp_lock);
@@ -783,10 +784,11 @@ svc_exit_thread(struct svc_rqst *rqstp)
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads--;
- list_del(&rqstp->rq_all);
+ if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
+ list_del_rcu(&rqstp->rq_all);
spin_unlock_bh(&pool->sp_lock);
- kfree(rqstp);
+ kfree_rcu(rqstp, rq_rcu_head);
/* Release the server */
if (serv)
@@ -1086,10 +1088,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_short_len;
/* Will be turned off only in gss privacy case: */
- rqstp->rq_splice_ok = true;
+ set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
- rqstp->rq_usedeferral = true;
- rqstp->rq_dropme = false;
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_DROPME, &rqstp->rq_flags);
/* Setup reply header */
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1189,7 +1191,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
/* Encode reply */
- if (rqstp->rq_dropme) {
+ if (test_bit(RQ_DROPME, &rqstp->rq_flags)) {
if (procp->pc_release)
procp->pc_release(rqstp, NULL, rqstp->rq_resp);
goto dropit;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bbb3b044b877..c69358b3cf7f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -220,9 +220,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
*/
static void svc_xprt_received(struct svc_xprt *xprt)
{
- WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags));
- if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
+ if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) {
+ WARN_ONCE(1, "xprt=0x%p already busy!", xprt);
return;
+ }
+
/* As soon as we clear busy, the xprt could be closed and
* 'put', so we need a reference to call svc_xprt_do_enqueue with:
*/
@@ -310,25 +312,6 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
}
EXPORT_SYMBOL_GPL(svc_print_addr);
-/*
- * Queue up an idle server thread. Must have pool->sp_lock held.
- * Note: this is really a stack rather than a queue, so that we only
- * use as many different threads as we need, and the rest don't pollute
- * the cache.
- */
-static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
-{
- list_add(&rqstp->rq_list, &pool->sp_threads);
-}
-
-/*
- * Dequeue an nfsd thread. Must have pool->sp_lock held.
- */
-static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
-{
- list_del(&rqstp->rq_list);
-}
-
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
{
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
@@ -341,11 +324,12 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
- struct svc_rqst *rqstp;
+ struct svc_rqst *rqstp = NULL;
int cpu;
+ bool queued = false;
if (!svc_xprt_has_something_to_do(xprt))
- return;
+ goto out;
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
@@ -355,43 +339,69 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
/* Don't enqueue transport while already enqueued */
dprintk("svc: transport %p busy, not enqueued\n", xprt);
- return;
+ goto out;
}
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
- spin_lock_bh(&pool->sp_lock);
- pool->sp_stats.packets++;
-
- if (!list_empty(&pool->sp_threads)) {
- rqstp = list_entry(pool->sp_threads.next,
- struct svc_rqst,
- rq_list);
- dprintk("svc: transport %p served by daemon %p\n",
- xprt, rqstp);
- svc_thread_dequeue(pool, rqstp);
- if (rqstp->rq_xprt)
- printk(KERN_ERR
- "svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
- rqstp, rqstp->rq_xprt);
- /* Note the order of the following 3 lines:
- * We want to assign xprt to rqstp->rq_xprt only _after_
- * we've woken up the process, so that we don't race with
- * the lockless check in svc_get_next_xprt().
+ atomic_long_inc(&pool->sp_stats.packets);
+
+redo_search:
+ /* find a thread for this xprt */
+ rcu_read_lock();
+ list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ /* Do a lockless check first */
+ if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+ continue;
+
+ /*
+ * Once the xprt has been queued, it can only be dequeued by
+ * the task that intends to service it. All we can do at that
+ * point is to try to wake this thread back up so that it can
+ * do so.
*/
- svc_xprt_get(xprt);
+ if (!queued) {
+ spin_lock_bh(&rqstp->rq_lock);
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
+ /* already busy, move on... */
+ spin_unlock_bh(&rqstp->rq_lock);
+ continue;
+ }
+
+ /* this one will do */
+ rqstp->rq_xprt = xprt;
+ svc_xprt_get(xprt);
+ spin_unlock_bh(&rqstp->rq_lock);
+ }
+ rcu_read_unlock();
+
+ atomic_long_inc(&pool->sp_stats.threads_woken);
wake_up_process(rqstp->rq_task);
- rqstp->rq_xprt = xprt;
- pool->sp_stats.threads_woken++;
- } else {
+ put_cpu();
+ goto out;
+ }
+ rcu_read_unlock();
+
+ /*
+ * We didn't find an idle thread to use, so we need to queue the xprt.
+ * Do so and then search again. If we find one, we can't hook this one
+ * up to it directly but we can wake the thread up in the hopes that it
+ * will pick it up once it searches for a xprt to service.
+ */
+ if (!queued) {
+ queued = true;
dprintk("svc: transport %p put into queue\n", xprt);
+ spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
+ spin_unlock_bh(&pool->sp_lock);
+ goto redo_search;
}
-
- spin_unlock_bh(&pool->sp_lock);
+ rqstp = NULL;
put_cpu();
+out:
+ trace_svc_xprt_do_enqueue(xprt, rqstp);
}
/*
@@ -408,22 +418,28 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
/*
- * Dequeue the first transport. Must be called with the pool->sp_lock held.
+ * Dequeue the first transport, if there is one.
*/
static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
{
- struct svc_xprt *xprt;
+ struct svc_xprt *xprt = NULL;
if (list_empty(&pool->sp_sockets))
- return NULL;
-
- xprt = list_entry(pool->sp_sockets.next,
- struct svc_xprt, xpt_ready);
- list_del_init(&xprt->xpt_ready);
+ goto out;
- dprintk("svc: transport %p dequeued, inuse=%d\n",
- xprt, atomic_read(&xprt->xpt_ref.refcount));
+ spin_lock_bh(&pool->sp_lock);
+ if (likely(!list_empty(&pool->sp_sockets))) {
+ xprt = list_first_entry(&pool->sp_sockets,
+ struct svc_xprt, xpt_ready);
+ list_del_init(&xprt->xpt_ready);
+ svc_xprt_get(xprt);
+ dprintk("svc: transport %p dequeued, inuse=%d\n",
+ xprt, atomic_read(&xprt->xpt_ref.refcount));
+ }
+ spin_unlock_bh(&pool->sp_lock);
+out:
+ trace_svc_xprt_dequeue(xprt);
return xprt;
}
@@ -484,34 +500,36 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
}
/*
- * External function to wake up a server waiting for data
- * This really only makes sense for services like lockd
- * which have exactly one thread anyway.
+ * Some svc_serv's will have occasional work to do, even when a xprt is not
+ * waiting to be serviced. This function is there to "kick" a task in one of
+ * those services so that it can wake up and do that work. Note that we only
+ * bother with pool 0 as we don't need to wake up more than one thread for
+ * this purpose.
*/
void svc_wake_up(struct svc_serv *serv)
{
struct svc_rqst *rqstp;
- unsigned int i;
struct svc_pool *pool;
- for (i = 0; i < serv->sv_nrpools; i++) {
- pool = &serv->sv_pools[i];
+ pool = &serv->sv_pools[0];
- spin_lock_bh(&pool->sp_lock);
- if (!list_empty(&pool->sp_threads)) {
- rqstp = list_entry(pool->sp_threads.next,
- struct svc_rqst,
- rq_list);
- dprintk("svc: daemon %p woken up.\n", rqstp);
- /*
- svc_thread_dequeue(pool, rqstp);
- rqstp->rq_xprt = NULL;
- */
- wake_up_process(rqstp->rq_task);
- } else
- pool->sp_task_pending = 1;
- spin_unlock_bh(&pool->sp_lock);
+ rcu_read_lock();
+ list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ /* skip any that aren't queued */
+ if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+ continue;
+ rcu_read_unlock();
+ dprintk("svc: daemon %p woken up.\n", rqstp);
+ wake_up_process(rqstp->rq_task);
+ trace_svc_wake_up(rqstp->rq_task->pid);
+ return;
}
+ rcu_read_unlock();
+
+ /* No free entries available */
+ set_bit(SP_TASK_PENDING, &pool->sp_flags);
+ smp_wmb();
+ trace_svc_wake_up(0);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
@@ -622,75 +640,86 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
return 0;
}
+static bool
+rqst_should_sleep(struct svc_rqst *rqstp)
+{
+ struct svc_pool *pool = rqstp->rq_pool;
+
+ /* did someone call svc_wake_up? */
+ if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+ return false;
+
+ /* was a socket queued? */
+ if (!list_empty(&pool->sp_sockets))
+ return false;
+
+ /* are we shutting down? */
+ if (signalled() || kthread_should_stop())
+ return false;
+
+ /* are we freezing? */
+ if (freezing(current))
+ return false;
+
+ return true;
+}
+
static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
long time_left = 0;
+ /* rq_xprt should be clear on entry */
+ WARN_ON_ONCE(rqstp->rq_xprt);
+
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
rqstp->rq_chandle.thread_wait = 5*HZ;
- spin_lock_bh(&pool->sp_lock);
xprt = svc_xprt_dequeue(pool);
if (xprt) {
rqstp->rq_xprt = xprt;
- svc_xprt_get(xprt);
/* As there is a shortage of threads and this request
* had to be queued, don't allow the thread to wait so
* long for cache updates.
*/
rqstp->rq_chandle.thread_wait = 1*HZ;
- pool->sp_task_pending = 0;
- } else {
- if (pool->sp_task_pending) {
- pool->sp_task_pending = 0;
- xprt = ERR_PTR(-EAGAIN);
- goto out;
- }
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
+ return xprt;
+ }
- /* No data pending. Go to sleep */
- svc_thread_enqueue(pool, rqstp);
- spin_unlock_bh(&pool->sp_lock);
+ /*
+ * We have to be able to interrupt this wait
+ * to bring down the daemons ...
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ clear_bit(RQ_BUSY, &rqstp->rq_flags);
+ smp_mb();
- if (!(signalled() || kthread_should_stop())) {
- time_left = schedule_timeout(timeout);
- __set_current_state(TASK_RUNNING);
+ if (likely(rqst_should_sleep(rqstp)))
+ time_left = schedule_timeout(timeout);
+ else
+ __set_current_state(TASK_RUNNING);
- try_to_freeze();
+ try_to_freeze();
- xprt = rqstp->rq_xprt;
- if (xprt != NULL)
- return xprt;
- } else
- __set_current_state(TASK_RUNNING);
+ spin_lock_bh(&rqstp->rq_lock);
+ set_bit(RQ_BUSY, &rqstp->rq_flags);
+ spin_unlock_bh(&rqstp->rq_lock);
- spin_lock_bh(&pool->sp_lock);
- if (!time_left)
- pool->sp_stats.threads_timedout++;
+ xprt = rqstp->rq_xprt;
+ if (xprt != NULL)
+ return xprt;
- xprt = rqstp->rq_xprt;
- if (!xprt) {
- svc_thread_dequeue(pool, rqstp);
- spin_unlock_bh(&pool->sp_lock);
- dprintk("svc: server %p, no data yet\n", rqstp);
- if (signalled() || kthread_should_stop())
- return ERR_PTR(-EINTR);
- else
- return ERR_PTR(-EAGAIN);
- }
- }
-out:
- spin_unlock_bh(&pool->sp_lock);
- return xprt;
+ if (!time_left)
+ atomic_long_inc(&pool->sp_stats.threads_timedout);
+
+ if (signalled() || kthread_should_stop())
+ return ERR_PTR(-EINTR);
+ return ERR_PTR(-EAGAIN);
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -719,7 +748,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
dprintk("svc_recv: found XPT_CLOSE\n");
svc_delete_xprt(xprt);
/* Leave XPT_BUSY set on the dead xprt: */
- return 0;
+ goto out;
}
if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
struct svc_xprt *newxpt;
@@ -750,6 +779,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
}
/* clear XPT_BUSY: */
svc_xprt_received(xprt);
+out:
+ trace_svc_handle_xprt(xprt, len);
return len;
}
@@ -797,7 +828,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
+ if (xprt->xpt_ops->xpo_secure_port(rqstp))
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
+ else
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
rqstp->rq_chandle.defer = svc_defer;
rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
@@ -895,7 +929,6 @@ static void svc_age_temp_xprts(unsigned long closure)
continue;
list_del_init(le);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
- set_bit(XPT_DETACHED, &xprt->xpt_flags);
dprintk("queuing xprt %p for closing\n", xprt);
/* a thread will dequeue and close it soon */
@@ -935,8 +968,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
xprt->xpt_ops->xpo_detach(xprt);
spin_lock_bh(&serv->sv_lock);
- if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
- list_del_init(&xprt->xpt_list);
+ list_del_init(&xprt->xpt_list);
WARN_ON_ONCE(!list_empty(&xprt->xpt_ready));
if (test_bit(XPT_TEMP, &xprt->xpt_flags))
serv->sv_tmpcnt--;
@@ -1080,7 +1112,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
struct svc_deferred_req *dr;
- if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
+ if (rqstp->rq_arg.page_len || !test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags))
return NULL; /* if more than a page, give up FIXME */
if (rqstp->rq_deferred) {
dr = rqstp->rq_deferred;
@@ -1109,7 +1141,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
}
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
- rqstp->rq_dropme = true;
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
@@ -1311,10 +1343,10 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
seq_printf(m, "%u %lu %lu %lu %lu\n",
pool->sp_id,
- pool->sp_stats.packets,
+ (unsigned long)atomic_long_read(&pool->sp_stats.packets),
pool->sp_stats.sockets_queued,
- pool->sp_stats.threads_woken,
- pool->sp_stats.threads_timedout);
+ (unsigned long)atomic_long_read(&pool->sp_stats.threads_woken),
+ (unsigned long)atomic_long_read(&pool->sp_stats.threads_timedout));
return 0;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f9c052d508f0..cc331b6cf573 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1145,7 +1145,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
- rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
+ if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
+ set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ else
+ clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 290af97bf6f9..1cb61242e55e 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -617,9 +617,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
fraglen = min_t(int, buf->len - len, tail->iov_len);
tail->iov_len -= fraglen;
buf->len -= fraglen;
- if (tail->iov_len && buf->len == len) {
+ if (tail->iov_len) {
xdr->p = tail->iov_base + tail->iov_len;
- /* xdr->end, xdr->iov should be set already */
+ WARN_ON_ONCE(!xdr->end);
+ WARN_ON_ONCE(!xdr->iov);
return;
}
WARN_ON_ONCE(fraglen);
@@ -631,11 +632,11 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
old = new + fraglen;
xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
- if (buf->page_len && buf->len == len) {
+ if (buf->page_len) {
xdr->p = page_address(*xdr->page_ptr);
xdr->end = (void *)xdr->p + PAGE_SIZE;
xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
- /* xdr->iov should already be NULL */
+ WARN_ON_ONCE(xdr->iov);
return;
}
if (fraglen) {