summaryrefslogtreecommitdiff
path: root/net/sched/act_ct.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/act_ct.c')
-rw-r--r--net/sched/act_ct.c141
1 files changed, 56 insertions, 85 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 0ca2bb8ed026..9cc0bc7c71ed 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -170,11 +170,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
enum ip_conntrack_dir dir,
+ enum ip_conntrack_info ctinfo,
struct flow_action *action)
{
struct nf_conn_labels *ct_labels;
struct flow_action_entry *entry;
- enum ip_conntrack_info ctinfo;
u32 *act_ct_labels;
entry = tcf_ct_flow_table_flow_action_get_next(action);
@@ -182,8 +182,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
- ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@@ -237,22 +235,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
}
static int tcf_ct_flow_table_fill_actions(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir tdir,
struct nf_flow_rule *flow_rule)
{
struct flow_action *action = &flow_rule->rule->action;
int num_entries = action->num_entries;
struct nf_conn *ct = flow->ct;
+ enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
int i, err;
switch (tdir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
dir = IP_CT_DIR_ORIGINAL;
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ if (ctinfo == IP_CT_ESTABLISHED)
+ set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
break;
case FLOW_OFFLOAD_DIR_REPLY:
dir = IP_CT_DIR_REPLY;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
break;
default:
return -EOPNOTSUPP;
@@ -262,7 +266,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
if (err)
goto err_nat;
- tcf_ct_flow_table_add_action_meta(ct, dir, action);
+ tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
return 0;
err_nat:
@@ -365,7 +369,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
- bool tcp)
+ bool tcp, bool bidirectional)
{
struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
@@ -384,6 +388,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ if (bidirectional)
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
@@ -407,26 +413,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- bool tcp = false;
-
- if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
- !test_bit(IPS_ASSURED_BIT, &ct->status))
- return;
+ bool tcp = false, bidirectional = true;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
- tcp = true;
- if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return;
+
+ tcp = true;
break;
case IPPROTO_UDP:
+ if (!nf_ct_is_confirmed(ct))
+ return;
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ bidirectional = false;
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: {
struct nf_conntrack_tuple *tuple;
- if (ct->status & IPS_NAT_MASK)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->status & IPS_NAT_MASK)
return;
+
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* No support for GRE v1 */
if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@@ -442,7 +456,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
ct->status & IPS_SEQ_ADJUST)
return;
- tcf_ct_flow_table_add(ct_ft, ct, tcp);
+ tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
}
static bool
@@ -621,13 +635,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
ct = flow->ct;
+ if (dir == FLOW_OFFLOAD_DIR_REPLY &&
+ !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
+ /* Only offload reply direction after connection became
+ * assured.
+ */
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
+ else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
+ /* If flow_table flow has already been updated to the
+ * established state, then don't refresh.
+ */
+ return false;
+ }
+
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
flow_offload_teardown(flow);
return false;
}
- ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
+ if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ else
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
@@ -695,31 +726,6 @@ drop_ct:
return false;
}
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
-{
- unsigned int len;
-
- switch (family) {
- case NFPROTO_IPV4:
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case NFPROTO_IPV6:
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- return pskb_trim_rcsum(skb, len);
-}
-
static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
{
u8 family = NFPROTO_UNSPEC;
@@ -779,6 +785,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
struct nf_conn *ct;
int err = 0;
bool frag;
+ u8 proto;
u16 mru;
/* Previously seen (loopback)? Ignore. */
@@ -794,50 +801,14 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
- mru = tc_skb_cb(skb)->mru;
-
- if (family == NFPROTO_IPV4) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- local_bh_disable();
- err = ip_defrag(net, skb, user);
- local_bh_enable();
- if (err && err != -EINPROGRESS)
- return err;
-
- if (!err) {
- *defrag = true;
- mru = IPCB(skb)->frag_max_size;
- }
- } else { /* NFPROTO_IPV6 */
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err && err != -EINPROGRESS)
- goto out_free;
-
- if (!err) {
- *defrag = true;
- mru = IP6CB(skb)->frag_max_size;
- }
-#else
- err = -EOPNOTSUPP;
- goto out_free;
-#endif
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
+ if (err)
+ return err;
- if (err != -EINPROGRESS)
- tc_skb_cb(skb)->mru = mru;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
- return err;
+ *defrag = true;
+ tc_skb_cb(skb)->mru = mru;
-out_free:
- kfree_skb(skb);
- return err;
+ return 0;
}
static void tcf_ct_params_free(struct tcf_ct_params *params)
@@ -980,7 +951,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
if (err)
goto drop;
- err = tcf_ct_skb_network_trim(skb, family);
+ err = nf_ct_skb_network_trim(skb, family);
if (err)
goto drop;