summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig3
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/dsa/tag_8021q.c43
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/dsa/tag_sja1105.c19
-rw-r--r--net/ethtool/debug.c4
-rw-r--r--net/ethtool/linkinfo.c4
-rw-r--r--net/ethtool/linkmodes.c4
-rw-r--r--net/ethtool/wol.c4
-rw-r--r--net/hsr/hsr_slave.c8
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/netfilter/nf_flow_table_core.c3
-rw-r--r--net/netfilter/nf_flow_table_ip.c14
-rw-r--r--net/netfilter/nf_flow_table_offload.c1
-rw-r--r--net/netfilter/nf_tables_api.c5
-rw-r--r--net/netfilter/nft_fwd_netdev.c12
-rw-r--r--net/netfilter/nft_set_pipapo.c34
-rw-r--r--net/netfilter/nft_set_rbtree.c87
-rw-r--r--net/rxrpc/af_rxrpc.c37
-rw-r--r--net/rxrpc/ar-internal.h5
-rw-r--r--net/rxrpc/call_object.c3
-rw-r--r--net/rxrpc/conn_client.c13
-rw-r--r--net/rxrpc/input.c1
-rw-r--r--net/rxrpc/sendmsg.c75
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/sch_cbs.c12
-rw-r--r--net/socket.c8
31 files changed, 277 insertions, 156 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 2eeb0e55f7c9..df8d8c9bd021 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,9 @@ config NET_INGRESS
config NET_EGRESS
bool
+config NET_REDIRECT
+ bool
+
config SKB_EXTENSIONS
bool
diff --git a/net/core/dev.c b/net/core/dev.c
index 402a986659cf..500bba8874b0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4516,7 +4516,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
- if (skb_is_tc_redirected(skb))
+ if (skb_is_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
@@ -5063,7 +5063,7 @@ skip_taps:
goto out;
}
#endif
- skb_reset_tc(skb);
+ skb_reset_redirect(skb);
skip_classify:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index acc849df60b5..d0641bba6b81 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3362,7 +3362,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* skb was 'freed' by stack, so clean few
* bits and reuse it
*/
- skb_reset_tc(skb);
+ skb_reset_redirect(skb);
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 2fb6c26294b5..b97ad93d1c1a 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -298,47 +298,4 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-/* In the DSA packet_type handler, skb->data points in the middle of the VLAN
- * tag, after tpid and before tci. This is because so far, ETH_HLEN
- * (DMAC, SMAC, EtherType) bytes were pulled.
- * There are 2 bytes of VLAN tag left in skb->data, and upper
- * layers expect the 'real' EtherType to be consumed as well.
- * Coincidentally, a VLAN header is also of the same size as
- * the number of bytes that need to be pulled.
- *
- * skb_mac_header skb->data
- * | |
- * v v
- * | | | | | | | | | | | | | | | | | | |
- * +-----------------------+-----------------------+-------+-------+-------+
- * | Destination MAC | Source MAC | TPID | TCI | EType |
- * +-----------------------+-----------------------+-------+-------+-------+
- * ^ | |
- * |<--VLAN_HLEN-->to <---VLAN_HLEN--->
- * from |
- * >>>>>>> v
- * >>>>>>> | | | | | | | | | | | | | | |
- * >>>>>>> +-----------------------+-----------------------+-------+
- * >>>>>>> | Destination MAC | Source MAC | EType |
- * +-----------------------+-----------------------+-------+
- * ^ ^
- * (now part of | |
- * skb->head) skb_mac_header skb->data
- */
-struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb)
-{
- u8 *from = skb_mac_header(skb);
- u8 *dest = from + VLAN_HLEN;
-
- memmove(dest, from, ETH_HLEN - VLAN_HLEN);
- skb_pull(skb, VLAN_HLEN);
- skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
- skb_pull_rcsum(skb, ETH_HLEN);
-
- return skb;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_remove_header);
-
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 9c3114179690..9169b63a89e3 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -140,6 +140,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
+ skb->offload_fwd_mark = 1;
+
return skb;
}
#endif
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 5366ea430349..d553bf36bd41 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -250,14 +250,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
{
struct sja1105_meta meta = {0};
int source_port, switch_id;
- struct vlan_ethhdr *hdr;
+ struct ethhdr *hdr;
u16 tpid, vid, tci;
bool is_link_local;
bool is_tagged;
bool is_meta;
- hdr = vlan_eth_hdr(skb);
- tpid = ntohs(hdr->h_vlan_proto);
+ hdr = eth_hdr(skb);
+ tpid = ntohs(hdr->h_proto);
is_tagged = (tpid == ETH_P_SJA1105);
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
@@ -266,7 +266,12 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
if (is_tagged) {
/* Normal traffic path. */
- tci = ntohs(hdr->h_vlan_TCI);
+ skb_push_rcsum(skb, ETH_HLEN);
+ __skb_vlan_pop(skb, &tci);
+ skb_pull_rcsum(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+
vid = tci & VLAN_VID_MASK;
source_port = dsa_8021q_rx_source_port(vid);
switch_id = dsa_8021q_rx_switch_id(vid);
@@ -295,12 +300,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- /* Delete/overwrite fake VLAN header, DSA expects to not find
- * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN).
- */
- if (is_tagged)
- skb = dsa_8021q_remove_header(skb);
-
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
}
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index aaef4843e6ba..92599ad7b3c2 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -107,8 +107,9 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -129,6 +130,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 5d16cb4e8693..6e9e0b590bb5 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -126,9 +126,10 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_link_ksettings ||
!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -162,6 +163,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 96f20be64553..18cc37be2d9c 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -338,9 +338,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_link_ksettings ||
!dev->ethtool_ops->set_link_ksettings)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -370,6 +371,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index e1b8a65b64c4..55e1ecaaf739 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -128,8 +128,9 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
dev = req_info.dev;
+ ret = -EOPNOTSUPP;
if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
- return -EOPNOTSUPP;
+ goto out_dev;
rtnl_lock();
ret = ethnl_ops_begin(dev);
@@ -172,6 +173,7 @@ out_ops:
ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
+out_dev:
dev_put(dev);
return ret;
}
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index fbfd0db182b7..a9104d42aafb 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -145,16 +145,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
if (!port)
return -ENOMEM;
+ port->hsr = hsr;
+ port->dev = dev;
+ port->type = type;
+
if (type != HSR_PT_MASTER) {
res = hsr_portdev_setup(dev, port);
if (res)
goto fail_dev_setup;
}
- port->hsr = hsr;
- port->dev = dev;
- port->type = type;
-
list_add_tail_rcu(&port->port_list, &hsr->ports);
synchronize_rcu();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 577db1d50a24..213be9c050ad 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -997,7 +997,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
return -ENOENT;
}
+ rcu_read_lock();
err = fib_table_dump(tb, skb, cb, &filter);
+ rcu_read_unlock();
return skb->len ? : err;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eb2d80519f8e..dc77c303e6f7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2948,8 +2948,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EPERM;
else if (tp->repair_queue == TCP_SEND_QUEUE)
WRITE_ONCE(tp->write_seq, val);
- else if (tp->repair_queue == TCP_RECV_QUEUE)
+ else if (tp->repair_queue == TCP_RECV_QUEUE) {
WRITE_ONCE(tp->rcv_nxt, val);
+ WRITE_ONCE(tp->copied_seq, val);
+ }
else
err = -EINVAL;
break;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 306e25d743e8..2f45cde168c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1109,6 +1109,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb))
return -ENOBUFS;
+ /* retransmit skbs might have a non zero value in skb->dev
+ * because skb->dev is aliased with skb->rbnode.rb_left
+ */
+ skb->dev = NULL;
}
inet = inet_sk(sk);
@@ -3037,8 +3041,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
tcp_skb_tsorted_save(skb) {
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ if (nskb) {
+ nskb->dev = NULL;
+ err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC);
+ } else {
+ err = -ENOBUFS;
+ }
} tcp_skb_tsorted_restore(skb);
if (!err) {
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 8af28e10b4e6..70ebebaf5bc1 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -554,6 +554,9 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
nf_flow_table_offload_flush(flow_table);
+ if (nf_flowtable_hw_offload(flow_table))
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
+ flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9e563fd3da0f..ba775aecd89a 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -146,11 +146,13 @@ static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+ nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
return -1;
+
+ iph = ip_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+ nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
return -1;
return 0;
@@ -189,6 +191,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
+ iph = ip_hdr(skb);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
@@ -426,11 +429,13 @@ static int nf_flow_nat_ipv6(const struct flow_offload *flow,
if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
return -1;
+
+ ip6h = ipv6_hdr(skb);
if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
return -1;
return 0;
@@ -459,6 +464,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
+ ip6h = ipv6_hdr(skb);
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 06f00cdc3891..f2c22c682851 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -87,6 +87,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
default:
return -EOPNOTSUPP;
}
+ mask->control.addr_type = 0xffff;
match->dissector.used_keys |= BIT(key->control.addr_type);
mask->basic.n_proto = 0xffff;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 38c680f28f15..d11f1a74d43c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5082,6 +5082,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -EBUSY;
else if (!(nlmsg_flags & NLM_F_EXCL))
err = 0;
+ } else if (err == -ENOTEMPTY) {
+ /* ENOTEMPTY reports overlapping between this element
+ * and an existing one.
+ */
+ err = -EEXIST;
}
goto err_element_clash;
}
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index aba11c2333f3..3087e23297db 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -28,6 +28,9 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
+ /* This is used by ifb only. */
+ skb_set_redirected(pkt->skb, true);
+
nf_fwd_netdev_egress(pkt, oif);
regs->verdict.code = NF_STOLEN;
}
@@ -190,6 +193,13 @@ nla_put_failure:
return -1;
}
+static int nft_fwd_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
+}
+
static struct nft_expr_type nft_fwd_netdev_type;
static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.type = &nft_fwd_netdev_type,
@@ -197,6 +207,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.eval = nft_fwd_neigh_eval,
.init = nft_fwd_neigh_init,
.dump = nft_fwd_neigh_dump,
+ .validate = nft_fwd_validate,
};
static const struct nft_expr_ops nft_fwd_netdev_ops = {
@@ -205,6 +216,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.eval = nft_fwd_netdev_eval,
.init = nft_fwd_netdev_init,
.dump = nft_fwd_netdev_dump,
+ .validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
};
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 4fc0c924ed5d..ef7e8ad2e344 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1098,21 +1098,41 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_field *f;
int i, bsize_max, err = 0;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+ end = (const u8 *)nft_set_ext_key_end(ext)->data;
+ else
+ end = start;
+
dup = pipapo_get(net, set, start, genmask);
- if (PTR_ERR(dup) == -ENOENT) {
- if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) {
- end = (const u8 *)nft_set_ext_key_end(ext)->data;
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
- } else {
- end = start;
+ if (!IS_ERR(dup)) {
+ /* Check if we already have the same exact entry */
+ const struct nft_data *dup_key, *dup_end;
+
+ dup_key = nft_set_ext_key(&dup->ext);
+ if (nft_set_ext_exists(&dup->ext, NFT_SET_EXT_KEY_END))
+ dup_end = nft_set_ext_key_end(&dup->ext);
+ else
+ dup_end = dup_key;
+
+ if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
+ !memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
+ *ext2 = &dup->ext;
+ return -EEXIST;
}
+
+ return -ENOTEMPTY;
+ }
+
+ if (PTR_ERR(dup) == -ENOENT) {
+ /* Look for partially overlapping entries */
+ dup = pipapo_get(net, set, end, nft_genmask_next(net));
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
*ext2 = &dup->ext;
- return -EEXIST;
+ return -ENOTEMPTY;
}
/* Validate */
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5000b938ab1e..8617fc16a1ed 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
}
+static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
+{
+ return !nft_rbtree_interval_end(rbe);
+}
+
static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
const struct nft_rbtree_elem *interval)
{
@@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(interval))
+ nft_rbtree_interval_start(interval))
continue;
interval = rbe;
} else if (d > 0)
@@ -89,7 +94,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_rbtree_interval_end(interval)) {
+ nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
}
@@ -208,8 +213,43 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
+ bool overlap = false;
int d;
+ /* Detect overlaps as we descend the tree. Set the flag in these cases:
+ *
+ * a1. |__ _ _? >|__ _ _ (insert start after existing start)
+ * a2. _ _ __>| ?_ _ __| (insert end before existing end)
+ * a3. _ _ ___| ?_ _ _>| (insert end after existing end)
+ * a4. >|__ _ _ _ _ __| (insert start before existing end)
+ *
+ * and clear it later on, as we eventually reach the points indicated by
+ * '?' above, in the cases described below. We'll always meet these
+ * later, locally, due to tree ordering, and overlaps for the intervals
+ * that are the closest together are always evaluated last.
+ *
+ * b1. |__ _ _! >|__ _ _ (insert start after existing end)
+ * b2. _ _ __>| !_ _ __| (insert end before existing start)
+ * b3. !_____>| (insert end after existing start)
+ *
+ * Case a4. resolves to b1.:
+ * - if the inserted start element is the leftmost, because the '0'
+ * element in the tree serves as end element
+ * - otherwise, if an existing end is found. Note that end elements are
+ * always inserted after corresponding start elements.
+ *
+ * For a new, rightmost pair of elements, we'll hit cases b1. and b3.,
+ * in that order.
+ *
+ * The flag is also cleared in two special cases:
+ *
+ * b4. |__ _ _!|<_ _ _ (insert start right before existing end)
+ * b5. |__ _ >|!__ _ _ (insert end right after existing start)
+ *
+ * which always happen as last step and imply that no further
+ * overlapping is possible.
+ */
+
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
@@ -218,17 +258,42 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
d = memcmp(nft_set_ext_key(&rbe->ext),
nft_set_ext_key(&new->ext),
set->klen);
- if (d < 0)
+ if (d < 0) {
p = &parent->rb_left;
- else if (d > 0)
+
+ if (nft_rbtree_interval_start(new)) {
+ overlap = nft_rbtree_interval_start(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ } else {
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ }
+ } else if (d > 0) {
p = &parent->rb_right;
- else {
+
+ if (nft_rbtree_interval_end(new)) {
+ overlap = nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext,
+ genmask);
+ } else if (nft_rbtree_interval_end(rbe) &&
+ nft_set_elem_active(&rbe->ext, genmask)) {
+ overlap = true;
+ }
+ } else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(new)) {
+ nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
- } else if (!nft_rbtree_interval_end(rbe) &&
+
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
+
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ overlap = false;
} else if (nft_set_elem_active(&rbe->ext, genmask)) {
*ext = &rbe->ext;
return -EEXIST;
@@ -237,6 +302,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
+
+ if (overlap)
+ return -ENOTEMPTY;
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -317,10 +386,10 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
else {
if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(this)) {
+ nft_rbtree_interval_start(this)) {
parent = parent->rb_left;
continue;
- } else if (!nft_rbtree_interval_end(rbe) &&
+ } else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fe42f986cd94..15ee92d79581 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -285,7 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
bool upgrade,
- bool intr,
+ enum rxrpc_interruptibility interruptibility,
unsigned int debug_id)
{
struct rxrpc_conn_parameters cp;
@@ -310,7 +310,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&p, 0, sizeof(p));
p.user_call_ID = user_call_ID;
p.tx_total_len = tx_total_len;
- p.intr = intr;
+ p.interruptibility = interruptibility;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
@@ -371,45 +371,18 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* rxrpc_kernel_check_life - Check to see whether a call is still alive
* @sock: The socket the call is on
* @call: The call to check
- * @_life: Where to store the life value
*
- * Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server. Passes back in *_life a number representing
- * the life state which can be compared to that returned by a previous call and
- * return true if the call is still alive.
- *
- * If the life state stalls, rxrpc_kernel_probe_life() should be called and
- * then 2RTT waited.
+ * Allow a kernel service to find out whether a call is still alive -
+ * ie. whether it has completed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
- const struct rxrpc_call *call,
- u32 *_life)
+ const struct rxrpc_call *call)
{
- *_life = call->acks_latest;
return call->state != RXRPC_CALL_COMPLETE;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
- * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
- * @sock: The socket the call is on
- * @call: The call to check
- *
- * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
- * find out whether a call is still alive by pinging it. This should cause the
- * life state to be bumped in about 2*RTT.
- *
- * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
- */
-void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
-{
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
- rxrpc_propose_ack_ping_for_check_life);
- rxrpc_send_ack_packet(call, true, NULL);
-}
-EXPORT_SYMBOL(rxrpc_kernel_probe_life);
-
-/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7d730c438404..3eb1ab40ca5c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -489,7 +489,6 @@ enum rxrpc_call_flag {
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */
- RXRPC_CALL_IS_INTR, /* The call is interruptible */
RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
};
@@ -598,6 +597,7 @@ struct rxrpc_call {
atomic_t usage;
u16 service_id; /* service ID */
u8 security_ix; /* Security type */
+ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
int debug_id; /* debug ID for printks */
@@ -675,7 +675,6 @@ struct rxrpc_call {
/* transmission-phase ACK management */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
- rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
@@ -721,7 +720,7 @@ struct rxrpc_call_params {
u32 normal; /* Max time since last call packet (msec) */
} timeouts;
u8 nr_timeouts; /* Number of timeouts specified */
- bool intr; /* The call is interruptible */
+ enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */
};
struct rxrpc_send_params {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c9f34b0a11df..f07970207b54 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -237,8 +237,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
return call;
}
- if (p->intr)
- __set_bit(RXRPC_CALL_IS_INTR, &call->flags);
+ call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
atomic_read(&call->usage),
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index ea7d4c21f889..f2a1a5dbb5a7 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -655,13 +655,20 @@ static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
add_wait_queue_exclusive(&call->waitq, &myself);
for (;;) {
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags))
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
set_current_state(TASK_INTERRUPTIBLE);
- else
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
if (call->call_id)
break;
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
signal_pending(current)) {
ret = -ERESTARTSYS;
break;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index ef10fbf71b15..69e09d69c896 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -882,7 +882,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
before(prev_pkt, call->ackr_prev_seq))
goto out;
call->acks_latest_ts = skb->tstamp;
- call->acks_latest = sp->hdr.serial;
call->ackr_first_seq = first_soft_ack;
call->ackr_prev_seq = prev_pkt;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 813fd6888142..0fcf157aa09f 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -18,6 +18,21 @@
#include "ar-internal.h"
/*
+ * Return true if there's sufficient Tx queue space.
+ */
+static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
+{
+ unsigned int win_size =
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack);
+
+ if (_tx_win)
+ *_tx_win = tx_win;
+ return call->tx_top - tx_win < win_size;
+}
+
+/*
* Wait for space to appear in the Tx queue or a signal to occur.
*/
static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
@@ -26,9 +41,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
{
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
+ if (rxrpc_check_tx_space(call, NULL))
return 0;
if (call->state >= RXRPC_CALL_COMPLETE)
@@ -49,7 +62,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
* Wait for space to appear in the Tx queue uninterruptibly, but with
* a timeout of 2*RTT if no progress was made and a signal occurred.
*/
-static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
struct rxrpc_call *call)
{
rxrpc_seq_t tx_start, tx_win;
@@ -58,8 +71,8 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
rtt = READ_ONCE(call->peer->rtt);
rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 1)
- rtt2 = 1;
+ if (rtt2 < 2)
+ rtt2 = 2;
timeout = rtt2;
tx_start = READ_ONCE(call->tx_hard_ack);
@@ -68,16 +81,13 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
set_current_state(TASK_UNINTERRUPTIBLE);
tx_win = READ_ONCE(call->tx_hard_ack);
- if (call->tx_top - tx_win <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
+ if (rxrpc_check_tx_space(call, &tx_win))
return 0;
if (call->state >= RXRPC_CALL_COMPLETE)
return call->error;
- if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) &&
- timeout == 0 &&
+ if (timeout == 0 &&
tx_win == tx_start && signal_pending(current))
return -EINTR;
@@ -92,6 +102,26 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
}
/*
+ * Wait for space to appear in the Tx queue uninterruptibly.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (rxrpc_check_tx_space(call, NULL))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ *timeo = schedule_timeout(*timeo);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
@@ -108,10 +138,19 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- if (waitall)
- ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
- else
- ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_waitall(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ break;
+ case RXRPC_PREINTERRUPTIBLE:
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo);
+ break;
+ }
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -302,9 +341,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
_debug("alloc");
- if (call->tx_top - call->tx_hard_ack >=
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra)) {
+ if (!rxrpc_check_tx_space(call, NULL)) {
ret = -EAGAIN;
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
@@ -619,7 +656,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
.call.tx_total_len = -1,
.call.user_call_ID = 0,
.call.nr_timeouts = 0,
- .call.intr = true,
+ .call.interruptibility = RXRPC_INTERRUPTIBLE,
.abort_code = 0,
.command = RXRPC_CMD_SEND_DATA,
.exclusive = false,
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f685c0d73708..41114b463161 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -739,7 +739,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
if (params)
- kfree_rcu(params, rcu);
+ call_rcu(&params->rcu, tcf_ct_params_free);
if (res == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1ad300e6dbc0..83dd82fc9f40 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -284,10 +284,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
/* mirror is always swallowed */
if (is_redirect) {
- skb2->tc_redirected = 1;
- skb2->tc_from_ingress = skb2->tc_at_ingress;
- if (skb2->tc_from_ingress)
- skb2->tstamp = 0;
+ skb_set_redirected(skb2, skb2->tc_at_ingress);
+
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index b2905b03a432..2eaac2ff380f 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -181,6 +181,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
s64 credits;
int len;
+ /* The previous packet is still being sent */
+ if (now < q->last) {
+ qdisc_watchdog_schedule_ns(&q->watchdog, q->last);
+ return NULL;
+ }
if (q->credits < 0) {
credits = timediff_to_credits(now - q->last, q->idleslope);
@@ -212,7 +217,12 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
credits += q->credits;
q->credits = max_t(s64, credits, q->locredit);
- q->last = now;
+ /* Estimate of the transmission of the last byte of the packet in ns */
+ if (unlikely(atomic64_read(&q->port_rate) == 0))
+ q->last = now;
+ else
+ q->last = now + div64_s64(len * NSEC_PER_SEC,
+ atomic64_read(&q->port_rate));
return skb;
}
diff --git a/net/socket.c b/net/socket.c
index b79a05de7c6e..2eecf1517f76 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1707,7 +1707,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags)
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1738,7 +1739,7 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/
__module_get(newsock->ops->owner);
- newfd = get_unused_fd_flags(flags);
+ newfd = __get_unused_fd_flags(flags, nofile);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
@@ -1807,7 +1808,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
f = fdget(fd);
if (f.file) {
ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
- upeer_addrlen, flags);
+ upeer_addrlen, flags,
+ rlimit(RLIMIT_NOFILE));
if (f.flags)
fput(f.file);
}