diff options
Diffstat (limited to 'net/sched/act_ct.c')
| -rw-r--r-- | net/sched/act_ct.c | 373 |
1 files changed, 214 insertions, 159 deletions
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 0ca2bb8ed026..2b6ac7069dc1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -41,21 +41,26 @@ static struct workqueue_struct *act_ct_wq; static struct rhashtable zones_ht; static DEFINE_MUTEX(zones_mutex); +struct zones_ht_key { + struct net *net; + u16 zone; +}; + struct tcf_ct_flow_table { struct rhash_head node; /* In zones tables */ struct rcu_work rwork; struct nf_flowtable nf_ft; refcount_t ref; - u16 zone; + struct zones_ht_key key; bool dying; }; static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), - .key_offset = offsetof(struct tcf_ct_flow_table, zone), - .key_len = sizeof_field(struct tcf_ct_flow_table, zone), + .key_offset = offsetof(struct tcf_ct_flow_table, key), + .key_len = offsetofend(struct zones_ht_key, zone), .automatic_shrinking = true, }; @@ -170,11 +175,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple, static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, enum ip_conntrack_dir dir, + enum ip_conntrack_info ctinfo, struct flow_action *action) { struct nf_conn_labels *ct_labels; struct flow_action_entry *entry; - enum ip_conntrack_info ctinfo; u32 *act_ct_labels; entry = tcf_ct_flow_table_flow_action_get_next(action); @@ -182,8 +187,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif - ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : - IP_CT_ESTABLISHED_REPLY; /* aligns with the CT reference on the SKB nf_ct_set */ entry->ct_metadata.cookie = (unsigned long)ct | ctinfo; entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL; @@ -237,22 +240,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net, } static int tcf_ct_flow_table_fill_actions(struct net *net, - const struct flow_offload *flow, + struct flow_offload *flow, enum flow_offload_tuple_dir tdir, struct nf_flow_rule *flow_rule) { struct flow_action *action = &flow_rule->rule->action; int num_entries = action->num_entries; struct nf_conn *ct = flow->ct; + enum ip_conntrack_info ctinfo; enum ip_conntrack_dir dir; int i, err; switch (tdir) { case FLOW_OFFLOAD_DIR_ORIGINAL: dir = IP_CT_DIR_ORIGINAL; + ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? + IP_CT_ESTABLISHED : IP_CT_NEW; + if (ctinfo == IP_CT_ESTABLISHED) + set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); break; case FLOW_OFFLOAD_DIR_REPLY: dir = IP_CT_DIR_REPLY; + ctinfo = IP_CT_ESTABLISHED_REPLY; break; default: return -EOPNOTSUPP; @@ -262,7 +271,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net, if (err) goto err_nat; - tcf_ct_flow_table_add_action_meta(ct, dir, action); + tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action); return 0; err_nat: @@ -274,18 +283,50 @@ err_nat: return err; } +static bool tcf_ct_flow_is_outdated(const struct flow_offload *flow) +{ + return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + test_bit(IPS_HW_OFFLOAD_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_PENDING, &flow->flags) && + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); +} + +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_get(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_get_ref(ct_ft); +} + +static void tcf_ct_flow_table_put(struct tcf_ct_flow_table *ct_ft); + +static void tcf_ct_nf_put(struct nf_flowtable *ft) +{ + struct tcf_ct_flow_table *ct_ft = + container_of(ft, struct tcf_ct_flow_table, nf_ft); + + tcf_ct_flow_table_put(ct_ft); +} + static struct nf_flowtable_type flowtable_ct = { + .gc = tcf_ct_flow_is_outdated, .action = tcf_ct_flow_table_fill_actions, + .get = tcf_ct_nf_get, + .put = tcf_ct_nf_put, .owner = THIS_MODULE, }; static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { + struct zones_ht_key key = { .net = net, .zone = params->zone }; struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, ¶ms->zone, zones_params); + ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) goto out_unlock; @@ -294,7 +335,7 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) goto err_alloc; refcount_set(&ct_ft->ref, 1); - ct_ft->zone = params->zone; + ct_ft->key = key; err = rhashtable_insert_fast(&zones_ht, &ct_ft->node, zones_params); if (err) goto err_insert; @@ -324,9 +365,13 @@ err_alloc: return err; } +static void tcf_ct_flow_table_get_ref(struct tcf_ct_flow_table *ct_ft) +{ + refcount_inc(&ct_ft->ref); +} + static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) { - struct flow_block_cb *block_cb, *tmp_cb; struct tcf_ct_flow_table *ct_ft; struct flow_block *block; @@ -334,13 +379,9 @@ static void tcf_ct_flow_table_cleanup_work(struct work_struct *work) rwork); nf_flow_table_free(&ct_ft->nf_ft); - /* Remove any remaining callbacks before cleanup */ block = &ct_ft->nf_ft.flow_block; down_write(&ct_ft->nf_ft.flow_block_lock); - list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } + WARN_ON(!list_empty(&block->cb_list)); up_write(&ct_ft->nf_ft.flow_block_lock); kfree(ct_ft); @@ -363,9 +404,20 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry, entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir]; } +static void tcf_ct_flow_ct_ext_ifidx_update(struct flow_offload *entry) +{ + struct nf_conn_act_ct_ext *act_ct_ext; + + act_ct_ext = nf_conn_act_ct_ext_find(entry->ct); + if (act_ct_ext) { + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL); + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY); + } +} + static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, - bool tcp) + bool tcp, bool bidirectional) { struct nf_conn_act_ct_ext *act_ct_ext; struct flow_offload *entry; @@ -384,6 +436,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; } + if (bidirectional) + __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags); act_ct_ext = nf_conn_act_ct_ext_find(ct); if (act_ct_ext) { @@ -407,26 +461,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - bool tcp = false; - - if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) || - !test_bit(IPS_ASSURED_BIT, &ct->status)) - return; + bool tcp = false, bidirectional = true; switch (nf_ct_protonum(ct)) { case IPPROTO_TCP: - tcp = true; - if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) + if ((ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY) || + !test_bit(IPS_ASSURED_BIT, &ct->status) || + ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED) return; + + tcp = true; break; case IPPROTO_UDP: + if (!nf_ct_is_confirmed(ct)) + return; + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) + bidirectional = false; break; #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: { struct nf_conntrack_tuple *tuple; - if (ct->status & IPS_NAT_MASK) + if ((ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY) || + !test_bit(IPS_ASSURED_BIT, &ct->status) || + ct->status & IPS_NAT_MASK) return; + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; /* No support for GRE v1 */ if (tuple->src.u.gre.key || tuple->dst.u.gre.key) @@ -442,7 +504,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, ct->status & IPS_SEQ_ADJUST) return; - tcf_ct_flow_table_add(ct_ft, ct, tcp); + tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional); } static bool @@ -596,6 +658,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct flow_offload_tuple tuple = {}; enum ip_conntrack_info ctinfo; struct tcphdr *tcph = NULL; + bool force_refresh = false; struct flow_offload *flow; struct nf_conn *ct; u8 dir; @@ -621,15 +684,40 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); ct = flow->ct; + if (dir == FLOW_OFFLOAD_DIR_REPLY && + !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) { + /* Only offload reply direction after connection became + * assured. + */ + if (test_bit(IPS_ASSURED_BIT, &ct->status)) + set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags); + else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags)) + /* If flow_table flow has already been updated to the + * established state, then don't refresh. + */ + return false; + force_refresh = true; + } + if (tcph && (unlikely(tcph->fin || tcph->rst))) { flow_offload_teardown(flow); return false; } - ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED : - IP_CT_ESTABLISHED_REPLY; + if (dir == FLOW_OFFLOAD_DIR_ORIGINAL) + ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? + IP_CT_ESTABLISHED : IP_CT_NEW; + else + ctinfo = IP_CT_ESTABLISHED_REPLY; + + nf_conn_act_ct_ext_fill(skb, ct, ctinfo); + tcf_ct_flow_ct_ext_ifidx_update(flow); + flow_offload_refresh(nf_ft, flow, force_refresh); + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + /* Process this flow in SW to allow promoting to ASSURED */ + return false; + } - flow_offload_refresh(nf_ft, flow); nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); if (nf_ft->flags & NF_FLOWTABLE_COUNTER) @@ -652,7 +740,6 @@ static struct tc_action_ops act_ct_ops; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ - bool labels; }; /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ @@ -695,31 +782,6 @@ drop_ct: return false; } -/* Trim the skb to the length specified by the IP/IPv6 header, - * removing any trailing lower-layer padding. This prepares the skb - * for higher-layer processing that assumes skb->len excludes padding - * (such as nf_ip_checksum). The caller needs to pull the skb to the - * network header, and ensure ip_hdr/ipv6_hdr points to valid data. - */ -static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) -{ - unsigned int len; - - switch (family) { - case NFPROTO_IPV4: - len = ntohs(ip_hdr(skb)->tot_len); - break; - case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); - break; - default: - len = skb->len; - } - - return pskb_trim_rcsum(skb, len); -} - static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) { u8 family = NFPROTO_UNSPEC; @@ -779,6 +841,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, struct nf_conn *ct; int err = 0; bool frag; + u8 proto; u16 mru; /* Previously seen (loopback)? Ignore. */ @@ -793,51 +856,14 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err || !frag) return err; - skb_get(skb); - mru = tc_skb_cb(skb)->mru; - - if (family == NFPROTO_IPV4) { - enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - local_bh_disable(); - err = ip_defrag(net, skb, user); - local_bh_enable(); - if (err && err != -EINPROGRESS) - return err; - - if (!err) { - *defrag = true; - mru = IPCB(skb)->frag_max_size; - } - } else { /* NFPROTO_IPV6 */ -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) - enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - - memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - err = nf_ct_frag6_gather(net, skb, user); - if (err && err != -EINPROGRESS) - goto out_free; - - if (!err) { - *defrag = true; - mru = IP6CB(skb)->frag_max_size; - } -#else - err = -EOPNOTSUPP; - goto out_free; -#endif - } + err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru); + if (err) + return err; - if (err != -EINPROGRESS) - tc_skb_cb(skb)->mru = mru; - skb_clear_hash(skb); - skb->ignore_df = 1; - return err; + *defrag = true; + tc_skb_cb(skb)->mru = mru; -out_free: - kfree_skb(skb); - return err; + return 0; } static void tcf_ct_params_free(struct tcf_ct_params *params) @@ -851,8 +877,13 @@ static void tcf_ct_params_free(struct tcf_ct_params *params) } if (params->ct_ft) tcf_ct_flow_table_put(params->ct_ft); - if (params->tmpl) + if (params->tmpl) { + if (params->put_labels) + nf_connlabels_put(nf_ct_net(params->tmpl)); + nf_ct_put(params->tmpl); + } + kfree(params); } @@ -913,11 +944,13 @@ static int tcf_ct_act_nat(struct sk_buff *skb, action |= BIT(NF_NAT_MANIP_DST); err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit); + if (err != NF_ACCEPT) + return err & NF_VERDICT_MASK; if (action & BIT(NF_NAT_MANIP_SRC)) - tc_skb_cb(skb)->post_ct_snat = 1; + qdisc_skb_cb(skb)->post_ct_snat = 1; if (action & BIT(NF_NAT_MANIP_DST)) - tc_skb_cb(skb)->post_ct_dnat = 1; + qdisc_skb_cb(skb)->post_ct_dnat = 1; return err; #else @@ -944,7 +977,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, p = rcu_dereference_bh(c->params); - retval = READ_ONCE(c->tcf_action); + retval = p->action; commit = p->ct_action & TCA_CT_ACT_COMMIT; clear = p->ct_action & TCA_CT_ACT_CLEAR; tmpl = p->tmpl; @@ -953,7 +986,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_action_update_bstats(&c->common, skb); if (clear) { - tc_skb_cb(skb)->post_ct = false; + qdisc_skb_cb(skb)->post_ct = false; ct = nf_ct_get(skb, &ctinfo); if (ct) { nf_ct_put(ct); @@ -973,14 +1006,10 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); - if (err == -EINPROGRESS) { - retval = TC_ACT_STOLEN; - goto out_clear; - } if (err) - goto drop; + goto out_frag; - err = tcf_ct_skb_network_trim(skb, family); + err = nf_ct_skb_network_trim(skb, family); if (err) goto drop; @@ -1008,7 +1037,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, state.pf = family; err = nf_conntrack_in(skb, &state); if (err != NF_ACCEPT) - goto out_push; + goto nf_error; } do_nat: @@ -1020,7 +1049,7 @@ do_nat: err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); if (err != NF_ACCEPT) - goto drop; + goto nf_error; if (!nf_ct_is_confirmed(ct) && commit && p->helper && !nfct_help(ct)) { err = __nf_ct_try_assign_helper(ct, p->tmpl, GFP_ATOMIC); @@ -1034,8 +1063,9 @@ do_nat: } if (nf_ct_is_confirmed(ct) ? ((!cached && !skip_add) || add_helper) : commit) { - if (nf_ct_helper(skb, ct, ctinfo, family) != NF_ACCEPT) - goto drop; + err = nf_ct_helper(skb, ct, ctinfo, family); + if (err != NF_ACCEPT) + goto nf_error; } if (commit) { @@ -1043,13 +1073,22 @@ do_nat: tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); if (!nf_ct_is_confirmed(ct)) - nf_conn_act_ct_ext_add(ct); + nf_conn_act_ct_ext_add(skb, ct, ctinfo); /* This will take care of sending queued events * even if the connection is already confirmed. */ - if (nf_conntrack_confirm(skb) != NF_ACCEPT) - goto drop; + err = nf_conntrack_confirm(skb); + if (err != NF_ACCEPT) + goto nf_error; + + /* The ct may be dropped if a clash has been resolved, + * so it's necessary to retrieve it from skb again to + * prevent UAF. + */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + skip_add = true; } if (!skip_add) @@ -1058,16 +1097,36 @@ do_nat: out_push: skb_push_rcsum(skb, nh_ofs); - tc_skb_cb(skb)->post_ct = true; + qdisc_skb_cb(skb)->post_ct = true; tc_skb_cb(skb)->zone = p->zone; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; +out_frag: + if (err != -EINPROGRESS) + tcf_action_inc_drop_qstats(&c->common); + return TC_ACT_CONSUMED; + drop: tcf_action_inc_drop_qstats(&c->common); return TC_ACT_SHOT; + +nf_error: + /* some verdicts store extra data in upper bits, such + * as errno or queue number. + */ + switch (err & NF_VERDICT_MASK) { + case NF_DROP: + goto drop; + case NF_STOLEN: + tcf_action_inc_drop_qstats(&c->common); + return TC_ACT_CONSUMED; + default: + DEBUG_NET_WARN_ON_ONCE(1); + goto drop; + } } static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { @@ -1124,9 +1183,8 @@ static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, range->min_addr.ip = nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); - range->max_addr.ip = max_attr ? - nla_get_in_addr(max_attr) : - range->min_addr.ip; + range->max_addr.ip = + nla_get_in_addr_default(max_attr, range->min_addr.ip); } else if (tb[TCA_CT_NAT_IPV6_MIN]) { struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; @@ -1176,9 +1234,9 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; int err, family, proto, len; + bool put_labels = false; struct nf_conn *tmpl; char *name; @@ -1208,15 +1266,20 @@ static int tcf_ct_fill_params(struct net *net, } if (tb[TCA_CT_LABELS]) { + unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); return -EOPNOTSUPP; } - if (!tn->labels) { + if (nf_connlabels_get(net, n_bits - 1)) { NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); return -EOPNOTSUPP; + } else { + put_labels = true; } + tcf_ct_set_key_val(tb, p->labels, TCA_CT_LABELS, p->labels_mask, TCA_CT_LABELS_MASK, @@ -1250,8 +1313,9 @@ static int tcf_ct_fill_params(struct net *net, err = -EINVAL; goto err; } - family = tb[TCA_CT_HELPER_FAMILY] ? nla_get_u8(tb[TCA_CT_HELPER_FAMILY]) : AF_INET; - proto = tb[TCA_CT_HELPER_PROTO] ? nla_get_u8(tb[TCA_CT_HELPER_PROTO]) : IPPROTO_TCP; + family = nla_get_u8_default(tb[TCA_CT_HELPER_FAMILY], AF_INET); + proto = nla_get_u8_default(tb[TCA_CT_HELPER_PROTO], + IPPROTO_TCP); err = nf_ct_add_helper(tmpl, name, family, proto, p->ct_action & TCA_CT_ACT_NAT, &p->helper); if (err) { @@ -1260,9 +1324,15 @@ static int tcf_ct_fill_params(struct net *net, } } - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); + p->put_labels = put_labels; + + if (p->ct_action & TCA_CT_ACT_COMMIT) + __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); return 0; err: + if (put_labels) + nf_connlabels_put(net); + nf_ct_put(p->tmpl); p->tmpl = NULL; return err; @@ -1312,7 +1382,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, res = ACT_P_CREATED; } else { if (bind) - return 0; + return ACT_P_BOUND; if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); @@ -1339,6 +1409,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (err) goto cleanup; + params->action = parm->action; spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); params = rcu_replace_pointer(c->params, params, @@ -1372,8 +1443,8 @@ static void tcf_ct_cleanup(struct tc_action *a) } static int tcf_ct_dump_key_val(struct sk_buff *skb, - void *val, int val_type, - void *mask, int mask_type, + const void *val, int val_type, + const void *mask, int mask_type, int len) { int err; @@ -1394,9 +1465,9 @@ static int tcf_ct_dump_key_val(struct sk_buff *skb, return 0; } -static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) +static int tcf_ct_dump_nat(struct sk_buff *skb, const struct tcf_ct_params *p) { - struct nf_nat_range2 *range = &p->range; + const struct nf_nat_range2 *range = &p->range; if (!(p->ct_action & TCA_CT_ACT_NAT)) return 0; @@ -1434,7 +1505,8 @@ static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) return 0; } -static int tcf_ct_dump_helper(struct sk_buff *skb, struct nf_conntrack_helper *helper) +static int tcf_ct_dump_helper(struct sk_buff *skb, + const struct nf_conntrack_helper *helper) { if (!helper) return 0; @@ -1451,9 +1523,8 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); - struct tcf_ct *c = to_ct(a); - struct tcf_ct_params *p; - + const struct tcf_ct *c = to_ct(a); + const struct tcf_ct_params *p; struct tc_ct opt = { .index = c->tcf_index, .refcnt = refcount_read(&c->tcf_refcnt) - ref, @@ -1461,10 +1532,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, }; struct tcf_t t; - spin_lock_bh(&c->tcf_lock); - p = rcu_dereference_protected(c->params, - lockdep_is_held(&c->tcf_lock)); - opt.action = c->tcf_action; + rcu_read_lock(); + p = rcu_dereference(c->params); + opt.action = p->action; if (tcf_ct_dump_key_val(skb, &p->ct_action, TCA_CT_ACTION, @@ -1509,11 +1579,11 @@ skip_dump: tcf_tm_dump(&t, &c->tcf_tm); if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD)) goto nla_put_failure; - spin_unlock_bh(&c->tcf_lock); + rcu_read_unlock(); return skb->len; nla_put_failure: - spin_unlock_bh(&c->tcf_lock); + rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } @@ -1534,6 +1604,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, if (bind) { struct flow_action_entry *entry = entry_data; + if (tcf_ct_helper(act)) + return -EOPNOTSUPP; + entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); @@ -1560,35 +1633,17 @@ static struct tc_action_ops act_ct_ops = { .offload_act_setup = tcf_ct_offload_act_setup, .size = sizeof(struct tcf_ct), }; +MODULE_ALIAS_NET_ACT("ct"); static __net_init int ct_init_net(struct net *net) { - unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - if (nf_connlabels_get(net, n_bits - 1)) { - tn->labels = false; - pr_err("act_ct: Failed to set connlabels length"); - } else { - tn->labels = true; - } - return tc_action_net_init(net, &tn->tn, &act_ct_ops); } static void __net_exit ct_exit_net(struct list_head *net_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - - if (tn->labels) - nf_connlabels_put(net); - } - rtnl_unlock(); - tc_action_net_exit(net_list, act_ct_ops.net_id); } |
