summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c1369
1 files changed, 1369 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
new file mode 100644
index 000000000000..ad3e3a65d403
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -0,0 +1,1369 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_ct.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <linux/workqueue.h>
+
+#include "esw/chains.h"
+#include "en/tc_ct.h"
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+
+#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
+#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
+#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
+#define MLX5_CT_STATE_TRK_BIT BIT(2)
+
+#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
+#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
+#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+
+struct mlx5_tc_ct_priv {
+ struct mlx5_eswitch *esw;
+ const struct net_device *netdev;
+ struct idr fte_ids;
+ struct idr tuple_ids;
+ struct rhashtable zone_ht;
+ struct mlx5_flow_table *ct;
+ struct mlx5_flow_table *ct_nat;
+ struct mlx5_flow_table *post_ct;
+ struct mutex control_lock; /* guards parallel adds/dels */
+};
+
+struct mlx5_ct_flow {
+ struct mlx5_esw_flow_attr pre_ct_attr;
+ struct mlx5_esw_flow_attr post_ct_attr;
+ struct mlx5_flow_handle *pre_ct_rule;
+ struct mlx5_flow_handle *post_ct_rule;
+ struct mlx5_ct_ft *ft;
+ u32 fte_id;
+ u32 chain_mapping;
+};
+
+struct mlx5_ct_zone_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_esw_flow_attr attr;
+ int tupleid;
+ bool nat;
+};
+
+struct mlx5_ct_ft {
+ struct rhash_head node;
+ u16 zone;
+ refcount_t refcount;
+ struct nf_flowtable *nf_ft;
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct rhashtable ct_entries_ht;
+ struct list_head ct_entries_list;
+};
+
+struct mlx5_ct_entry {
+ struct list_head list;
+ u16 zone;
+ struct rhash_head node;
+ struct flow_rule *flow_rule;
+ struct mlx5_fc *counter;
+ unsigned long lastuse;
+ unsigned long cookie;
+ unsigned long restore_cookie;
+ struct mlx5_ct_zone_rule zone_rules[2];
+};
+
+static const struct rhashtable_params cts_ht_params = {
+ .head_offset = offsetof(struct mlx5_ct_entry, node),
+ .key_offset = offsetof(struct mlx5_ct_entry, cookie),
+ .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
+ .automatic_shrinking = true,
+ .min_size = 16 * 1024,
+};
+
+static const struct rhashtable_params zone_params = {
+ .head_offset = offsetof(struct mlx5_ct_ft, node),
+ .key_offset = offsetof(struct mlx5_ct_ft, zone),
+ .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
+ .automatic_shrinking = true,
+};
+
+static struct mlx5_tc_ct_priv *
+mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_rep_uplink_priv *uplink_priv;
+ struct mlx5e_rep_priv *uplink_rpriv;
+
+ uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+ uplink_priv = &uplink_rpriv->uplink_priv;
+ return uplink_priv->ct_priv;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
+ struct flow_rule *rule)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ outer_headers);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_match_basic match;
+
+ flow_rule_match_basic(rule, &match);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(match.mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(match.key->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ match.mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ match.key->ip_proto);
+
+ ip_proto = match.key->ip_proto;
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_match_control match;
+
+ flow_rule_match_control(rule, &match);
+ addr_type = match.key->addr_type;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_match_ipv4_addrs match;
+
+ flow_rule_match_ipv4_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &match.key->src, sizeof(match.key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_ipv6_addrs(rule, &match);
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.mask->src, sizeof(match.mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &match.key->src, sizeof(match.key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.mask->dst, sizeof(match.mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &match.key->dst, sizeof(match.key->dst));
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_match_ports match;
+
+ flow_rule_match_ports(rule, &match);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(match.key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(match.mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(match.key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(match.mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(match.key->dst));
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+ struct flow_match_tcp match;
+
+ flow_rule_match_tcp(rule, &match);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(match.mask->flags));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
+ ntohs(match.key->flags));
+ }
+
+ return 0;
+}
+
+static void
+mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry,
+ bool nat)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
+ struct mlx5_eswitch *esw = ct_priv->esw;
+
+ ct_dbg("Deleting ct entry rule in zone %d", entry->zone);
+
+ mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
+ mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr);
+ idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid);
+}
+
+static void
+mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_entry *entry)
+{
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
+}
+
+static struct flow_action_entry *
+mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct flow_action_entry *act;
+ int i;
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT_METADATA)
+ return act;
+ }
+
+ return NULL;
+}
+
+static int
+mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ u8 ct_state,
+ u32 mark,
+ u32 label,
+ u32 tupleid)
+{
+ struct mlx5_eswitch *esw = ct_priv->esw;
+ int err;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ CTSTATE_TO_REG, ct_state);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ MARK_TO_REG, mark);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ LABELS_TO_REG, label);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
+ TUPLEID_TO_REG, tupleid);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
+ char *modact)
+{
+ u32 offset = act->mangle.offset, field;
+
+ switch (act->mangle.htype) {
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct iphdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
+ else if (offset == offsetof(struct iphdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
+ MLX5_SET(set_action_in, modact, length, 0);
+ if (offset == offsetof(struct ipv6hdr, saddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, saddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
+ else if (offset == offsetof(struct ipv6hdr, daddr))
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
+ else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
+ field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct tcphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
+ else if (offset == offsetof(struct tcphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+ MLX5_SET(set_action_in, modact, length, 16);
+ if (offset == offsetof(struct udphdr, source))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
+ else if (offset == offsetof(struct udphdr, dest))
+ field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
+ else
+ return -EOPNOTSUPP;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+ MLX5_SET(set_action_in, modact, offset, 0);
+ MLX5_SET(set_action_in, modact, field, field);
+ MLX5_SET(set_action_in, modact, data, act->mangle.val);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ struct flow_action *flow_action = &flow_rule->action;
+ struct mlx5_core_dev *mdev = ct_priv->esw->dev;
+ struct flow_action_entry *act;
+ size_t action_size;
+ char *modact;
+ int err, i;
+
+ action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+
+ flow_action_for_each(i, act, flow_action) {
+ switch (act->id) {
+ case FLOW_ACTION_MANGLE: {
+ err = alloc_mod_hdr_actions(mdev,
+ MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts);
+ if (err)
+ return err;
+
+ modact = mod_acts->actions +
+ mod_acts->num_actions * action_size;
+
+ err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
+ if (err)
+ return err;
+
+ mod_acts->num_actions++;
+ }
+ break;
+
+ case FLOW_ACTION_CT_METADATA:
+ /* Handled earlier */
+ continue;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_esw_flow_attr *attr,
+ struct flow_rule *flow_rule,
+ u32 tupleid,
+ bool nat)
+{
+ struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct flow_action_entry *meta;
+ int err;
+
+ meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta)
+ return -EOPNOTSUPP;
+
+ if (meta->ct_metadata.labels[1] ||
+ meta->ct_metadata.labels[2] ||
+ meta->ct_metadata.labels[3]) {
+ ct_dbg("Failed to offload ct entry due to unsupported label");
+ return -EOPNOTSUPP;
+ }
+
+ if (nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
+ &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
+
+ err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
+ (MLX5_CT_STATE_ESTABLISHED_BIT |
+ MLX5_CT_STATE_TRK_BIT),
+ meta->ct_metadata.mark,
+ meta->ct_metadata.labels[0],
+ tupleid);
+ if (err)
+ goto err_mapping;
+
+ mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts.num_actions,
+ mod_acts.actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ goto err_mapping;
+ }
+ attr->modify_hdr = mod_hdr;
+
+ dealloc_mod_hdr_actions(&mod_acts);
+ return 0;
+
+err_mapping:
+ dealloc_mod_hdr_actions(&mod_acts);
+ return err;
+}
+
+static int
+mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry,
+ bool nat)
+{
+ struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
+ struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
+ struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_flow_spec *spec = NULL;
+ u32 tupleid = 1;
+ int err;
+
+ zone_rule->nat = nat;
+
+ spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Get tuple unique id */
+ err = idr_alloc_u32(&ct_priv->tuple_ids, zone_rule, &tupleid,
+ TUPLE_ID_MAX, GFP_KERNEL);
+ if (err) {
+ netdev_warn(ct_priv->netdev,
+ "Failed to allocate tuple id, err: %d\n", err);
+ goto err_idr_alloc;
+ }
+ zone_rule->tupleid = tupleid;
+
+ err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
+ tupleid, nat);
+ if (err) {
+ ct_dbg("Failed to create ct entry mod hdr");
+ goto err_mod_hdr;
+ }
+
+ attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
+ MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = 0;
+ attr->dest_ft = ct_priv->post_ct;
+ attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
+ attr->outer_match_level = MLX5_MATCH_L4;
+ attr->counter = entry->counter;
+ attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+
+ mlx5_tc_ct_set_tuple_match(spec, flow_rule);
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ entry->zone & MLX5_CT_ZONE_MASK,
+ MLX5_CT_ZONE_MASK);
+
+ zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ if (IS_ERR(zone_rule->rule)) {
+ err = PTR_ERR(zone_rule->rule);
+ ct_dbg("Failed to add ct entry rule, nat: %d", nat);
+ goto err_rule;
+ }
+
+ kfree(spec);
+ ct_dbg("Offloaded ct entry rule in zone %d", entry->zone);
+
+ return 0;
+
+err_rule:
+ mlx5_modify_header_dealloc(esw->dev, attr->modify_hdr);
+err_mod_hdr:
+ idr_remove(&ct_priv->tuple_ids, zone_rule->tupleid);
+err_idr_alloc:
+ kfree(spec);
+ return err;
+}
+
+static int
+mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
+ struct flow_rule *flow_rule,
+ struct mlx5_ct_entry *entry)
+{
+ struct mlx5_eswitch *esw = ct_priv->esw;
+ int err;
+
+ entry->counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(entry->counter)) {
+ err = PTR_ERR(entry->counter);
+ ct_dbg("Failed to create counter for ct entry");
+ return err;
+ }
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false);
+ if (err)
+ goto err_orig;
+
+ err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true);
+ if (err)
+ goto err_nat;
+
+ return 0;
+
+err_nat:
+ mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+err_orig:
+ mlx5_fc_destroy(esw->dev, entry->counter);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
+ struct flow_action_entry *meta_action;
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+ int err;
+
+ meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
+ if (!meta_action)
+ return -EOPNOTSUPP;
+
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
+ cts_ht_params);
+ if (entry)
+ return 0;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->zone = ft->zone;
+ entry->flow_rule = flow_rule;
+ entry->cookie = flow->cookie;
+ entry->restore_cookie = meta_action->ct_metadata.cookie;
+
+ err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry);
+ if (err)
+ goto err_rules;
+
+ err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_insert;
+
+ list_add(&entry->list, &ft->ct_entries_list);
+
+ return 0;
+
+err_insert:
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+err_rules:
+ kfree(entry);
+ netdev_warn(ct_priv->netdev,
+ "Failed to offload ct entry, err: %d\n", err);
+ return err;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *flow)
+{
+ unsigned long cookie = flow->cookie;
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
+ cts_ht_params);
+ if (!entry)
+ return -ENOENT;
+
+ mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry);
+ WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params));
+ list_del(&entry->list);
+ kfree(entry);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
+ struct flow_cls_offload *f)
+{
+ unsigned long cookie = f->cookie;
+ struct mlx5_ct_entry *entry;
+ u64 lastuse, packets, bytes;
+
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
+ cts_ht_params);
+ if (!entry)
+ return -ENOENT;
+
+ mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
+ flow_stats_update(&f->stats, bytes, packets, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+
+ return 0;
+}
+
+static int
+mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct flow_cls_offload *f = type_data;
+ struct mlx5_ct_ft *ft = cb_priv;
+
+ if (type != TC_SETUP_CLSFLOWER)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case FLOW_CLS_REPLACE:
+ return mlx5_tc_ct_block_flow_offload_add(ft, f);
+ case FLOW_CLS_DESTROY:
+ return mlx5_tc_ct_block_flow_offload_del(ft, f);
+ case FLOW_CLS_STATS:
+ return mlx5_tc_ct_block_flow_offload_stats(ft, f);
+ default:
+ break;
+ };
+
+ return -EOPNOTSUPP;
+}
+
+int
+mlx5_tc_ct_parse_match(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+ struct flow_dissector_key_ct *mask, *key;
+ bool trk, est, untrk, unest, new;
+ u32 ctstate = 0, ctstate_mask = 0;
+ u16 ct_state_on, ct_state_off;
+ u16 ct_state, ct_state_mask;
+ struct flow_match_ct match;
+
+ if (!flow_rule_match_key(f->rule, FLOW_DISSECTOR_KEY_CT))
+ return 0;
+
+ if (!ct_priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct matching isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ flow_rule_match_ct(f->rule, &match);
+
+ key = match.key;
+ mask = match.mask;
+
+ ct_state = key->ct_state;
+ ct_state_mask = mask->ct_state;
+
+ if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
+ TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only ct_state trk, est and new are supported for offload");
+ return -EOPNOTSUPP;
+ }
+
+ if (mask->ct_labels[1] || mask->ct_labels[2] || mask->ct_labels[3]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "only lower 32bits of ct_labels are supported for offload");
+ return -EOPNOTSUPP;
+ }
+
+ ct_state_on = ct_state & ct_state_mask;
+ ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
+ trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
+ est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+ untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
+
+ ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+ ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
+ ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
+
+ if (new) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "matching on ct_state +new isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (mask->ct_zone)
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+ key->ct_zone, MLX5_CT_ZONE_MASK);
+ if (ctstate_mask)
+ mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
+ ctstate, ctstate_mask);
+ if (mask->ct_mark)
+ mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
+ key->ct_mark, mask->ct_mark);
+ if (mask->ct_labels[0])
+ mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG,
+ key->ct_labels[0],
+ mask->ct_labels[0]);
+
+ return 0;
+}
+
+int
+mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
+ struct mlx5_esw_flow_attr *attr,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+
+ if (!ct_priv) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "offload of ct action isn't available");
+ return -EOPNOTSUPP;
+ }
+
+ attr->ct_attr.zone = act->ct.zone;
+ attr->ct_attr.ct_action = act->ct.action;
+ attr->ct_attr.nf_ft = act->ct.flow_table;
+
+ return 0;
+}
+
+static struct mlx5_ct_ft *
+mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
+ struct nf_flowtable *nf_ft)
+{
+ struct mlx5_ct_ft *ft;
+ int err;
+
+ ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
+ if (ft) {
+ refcount_inc(&ft->refcount);
+ return ft;
+ }
+
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ return ERR_PTR(-ENOMEM);
+
+ ft->zone = zone;
+ ft->nf_ft = nf_ft;
+ ft->ct_priv = ct_priv;
+ INIT_LIST_HEAD(&ft->ct_entries_list);
+ refcount_set(&ft->refcount, 1);
+
+ err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
+ if (err)
+ goto err_init;
+
+ err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
+ zone_params);
+ if (err)
+ goto err_insert;
+
+ err = nf_flow_table_offload_add_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ if (err)
+ goto err_add_cb;
+
+ return ft;
+
+err_add_cb:
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+err_insert:
+ rhashtable_destroy(&ft->ct_entries_ht);
+err_init:
+ kfree(ft);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_tc_ct_flush_ft(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+{
+ struct mlx5_ct_entry *entry;
+
+ list_for_each_entry(entry, &ft->ct_entries_list, list)
+ mlx5_tc_ct_entry_del_rules(ft->ct_priv, entry);
+}
+
+static void
+mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
+{
+ if (!refcount_dec_and_test(&ft->refcount))
+ return;
+
+ nf_flow_table_offload_del_cb(ft->nf_ft,
+ mlx5_tc_ct_block_flow_offload, ft);
+ mlx5_tc_ct_flush_ft(ct_priv, ft);
+ rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
+ rhashtable_destroy(&ft->ct_entries_ht);
+ kfree(ft);
+}
+
+/* We translate the tc filter with CT action to the following HW model:
+ *
+ * +-------------------+ +--------------------+ +--------------+
+ * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +----->
+ * + original match + | + tuple + zone match + | + fte_id match + |
+ * +-------------------+ | +--------------------+ | +--------------+ |
+ * v v v
+ * set chain miss mapping set mark original
+ * set fte_id set label filter
+ * set zone set established actions
+ * set tunnel_id do nat (if needed)
+ * do decap
+ */
+static int
+__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *orig_spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5_flow_handle **flow_rule)
+{
+ struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+ bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+ struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5_flow_spec *post_ct_spec = NULL;
+ struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_esw_flow_attr *pre_ct_attr;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_ct_flow *ct_flow;
+ int chain_mapping = 0, err;
+ struct mlx5_ct_ft *ft;
+ u32 fte_id = 1;
+
+ post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
+ ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
+ if (!post_ct_spec || !ct_flow) {
+ kfree(post_ct_spec);
+ kfree(ct_flow);
+ return -ENOMEM;
+ }
+
+ /* Register for CT established events */
+ ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
+ attr->ct_attr.nf_ft);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ ct_dbg("Failed to register to ft callback");
+ goto err_ft;
+ }
+ ct_flow->ft = ft;
+
+ err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
+ MLX5_FTE_ID_MAX, GFP_KERNEL);
+ if (err) {
+ netdev_warn(priv->netdev,
+ "Failed to allocate fte id, err: %d\n", err);
+ goto err_idr;
+ }
+ ct_flow->fte_id = fte_id;
+
+ /* Base esw attributes of both rules on original rule attribute */
+ pre_ct_attr = &ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, sizeof(*attr));
+ memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
+
+ /* Modify the original rule's action to fwd and modify, leave decap */
+ pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Write chain miss tag for miss in ct table as we
+ * don't go though all prios of this chain as normal tc rules
+ * miss.
+ */
+ err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
+ &chain_mapping);
+ if (err) {
+ ct_dbg("Failed to get chain register mapping for chain");
+ goto err_get_chain;
+ }
+ ct_flow->chain_mapping = chain_mapping;
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ CHAIN_TO_REG, chain_mapping);
+ if (err) {
+ ct_dbg("Failed to set chain register mapping");
+ goto err_mapping;
+ }
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG,
+ attr->ct_attr.zone &
+ MLX5_CT_ZONE_MASK);
+ if (err) {
+ ct_dbg("Failed to set zone register mapping");
+ goto err_mapping;
+ }
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ FTEID_TO_REG, fte_id);
+ if (err) {
+ ct_dbg("Failed to set fte_id register mapping");
+ goto err_mapping;
+ }
+
+ /* If original flow is decap, we do it before going into ct table
+ * so add a rewrite for the tunnel match_id.
+ */
+ if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ attr->chain == 0) {
+ u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
+
+ err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
+ TUNNEL_TO_REG,
+ tun_id);
+ if (err) {
+ ct_dbg("Failed to set tunnel register mapping");
+ goto err_mapping;
+ }
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(esw->dev,
+ MLX5_FLOW_NAMESPACE_FDB,
+ pre_mod_acts.num_actions,
+ pre_mod_acts.actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to create pre ct mod hdr");
+ goto err_mapping;
+ }
+ pre_ct_attr->modify_hdr = mod_hdr;
+
+ /* Post ct rule matches on fte_id and executes original rule's
+ * tc rule action
+ */
+ mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
+ fte_id, MLX5_FTE_ID_MASK);
+
+ /* Put post_ct rule on post_ct fdb */
+ ct_flow->post_ct_attr.chain = 0;
+ ct_flow->post_ct_attr.prio = 0;
+ ct_flow->post_ct_attr.fdb = ct_priv->post_ct;
+
+ ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE;
+ ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE;
+ ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+ rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec,
+ &ct_flow->post_ct_attr);
+ ct_flow->post_ct_rule = rule;
+ if (IS_ERR(ct_flow->post_ct_rule)) {
+ err = PTR_ERR(ct_flow->post_ct_rule);
+ ct_dbg("Failed to add post ct rule");
+ goto err_insert_post_ct;
+ }
+
+ /* Change original rule point to ct table */
+ pre_ct_attr->dest_chain = 0;
+ pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+ ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
+ orig_spec,
+ pre_ct_attr);
+ if (IS_ERR(ct_flow->pre_ct_rule)) {
+ err = PTR_ERR(ct_flow->pre_ct_rule);
+ ct_dbg("Failed to add pre ct rule");
+ goto err_insert_orig;
+ }
+
+ attr->ct_attr.ct_flow = ct_flow;
+ *flow_rule = ct_flow->post_ct_rule;
+ dealloc_mod_hdr_actions(&pre_mod_acts);
+ kfree(post_ct_spec);
+
+ return 0;
+
+err_insert_orig:
+ mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule,
+ &ct_flow->post_ct_attr);
+err_insert_post_ct:
+ mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
+err_mapping:
+ dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
+err_get_chain:
+ idr_remove(&ct_priv->fte_ids, fte_id);
+err_idr:
+ mlx5_tc_ct_del_ft_cb(ct_priv, ft);
+err_ft:
+ kfree(post_ct_spec);
+ kfree(ct_flow);
+ netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
+ return err;
+}
+
+static int
+__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *orig_spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts,
+ struct mlx5_flow_handle **flow_rule)
+{
+ struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+ struct mlx5_eswitch *esw = ct_priv->esw;
+ struct mlx5_esw_flow_attr *pre_ct_attr;
+ struct mlx5_modify_hdr *mod_hdr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_ct_flow *ct_flow;
+ int err;
+
+ ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
+ if (!ct_flow)
+ return -ENOMEM;
+
+ /* Base esw attributes on original rule attribute */
+ pre_ct_attr = &ct_flow->pre_ct_attr;
+ memcpy(pre_ct_attr, attr, sizeof(*attr));
+
+ err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
+ if (err) {
+ ct_dbg("Failed to set register for ct clear");
+ goto err_set_registers;
+ }
+
+ mod_hdr = mlx5_modify_header_alloc(esw->dev,
+ MLX5_FLOW_NAMESPACE_FDB,
+ mod_acts->num_actions,
+ mod_acts->actions);
+ if (IS_ERR(mod_hdr)) {
+ err = PTR_ERR(mod_hdr);
+ ct_dbg("Failed to add create ct clear mod hdr");
+ goto err_set_registers;
+ }
+
+ dealloc_mod_hdr_actions(mod_acts);
+ pre_ct_attr->modify_hdr = mod_hdr;
+ pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ ct_dbg("Failed to add ct clear rule");
+ goto err_insert;
+ }
+
+ attr->ct_attr.ct_flow = ct_flow;
+ ct_flow->pre_ct_rule = rule;
+ *flow_rule = rule;
+
+ return 0;
+
+err_insert:
+ mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
+err_set_registers:
+ netdev_warn(priv->netdev,
+ "Failed to offload ct clear flow, err %d\n", err);
+ return err;
+}
+
+struct mlx5_flow_handle *
+mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
+ struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+ struct mlx5_flow_handle *rule;
+ int err;
+
+ if (!ct_priv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&ct_priv->control_lock);
+ if (clear_action)
+ err = __mlx5_tc_ct_flow_offload_clear(priv, flow, spec, attr,
+ mod_hdr_acts, &rule);
+ else
+ err = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr,
+ &rule);
+ mutex_unlock(&ct_priv->control_lock);
+ if (err)
+ return ERR_PTR(err);
+
+ return rule;
+}
+
+static void
+__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_ct_flow *ct_flow)
+{
+ struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr;
+ struct mlx5_eswitch *esw = ct_priv->esw;
+
+ mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule,
+ pre_ct_attr);
+ mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr);
+
+ if (ct_flow->post_ct_rule) {
+ mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule,
+ &ct_flow->post_ct_attr);
+ mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
+ idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
+ }
+
+ kfree(ct_flow);
+}
+
+void
+mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
+ struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
+
+ /* We are called on error to clean up stuff from parsing
+ * but we don't have anything for now
+ */
+ if (!ct_flow)
+ return;
+
+ mutex_lock(&ct_priv->control_lock);
+ __mlx5_tc_ct_delete_flow(ct_priv, ct_flow);
+ mutex_unlock(&ct_priv->control_lock);
+}
+
+static int
+mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
+ const char **err_msg)
+{
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ /* cannot restore chain ID on HW miss */
+
+ *err_msg = "tc skb extension missing";
+ return -EOPNOTSUPP;
+#endif
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
+ *err_msg = "firmware level support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
+ /* vlan workaround should be avoided for multi chain rules.
+ * This is just a sanity check as pop vlan action should
+ * be supported by any FW that supports ignore_flow_level
+ */
+
+ *err_msg = "firmware vlan actions support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
+ fdb_modify_header_fwd_to_table)) {
+ /* CT always writes to registers which are mod header actions.
+ * Therefore, mod header and goto is required
+ */
+
+ *err_msg = "firmware fwd and modify support is missing";
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+ *err_msg = "register loopback isn't supported";
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void
+mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err)
+{
+ if (msg)
+ netdev_warn(rpriv->netdev,
+ "tc ct offload not supported, %s, err: %d\n",
+ msg, err);
+ else
+ netdev_warn(rpriv->netdev,
+ "tc ct offload not supported, err: %d\n",
+ err);
+}
+
+int
+mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
+{
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5_eswitch *esw;
+ struct mlx5e_priv *priv;
+ const char *msg;
+ int err;
+
+ rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+ priv = netdev_priv(rpriv->netdev);
+ esw = priv->mdev->priv.eswitch;
+
+ err = mlx5_tc_ct_init_check_support(esw, &msg);
+ if (err) {
+ mlx5_tc_ct_init_err(rpriv, msg, err);
+ goto err_support;
+ }
+
+ ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
+ if (!ct_priv) {
+ mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM);
+ goto err_alloc;
+ }
+
+ ct_priv->esw = esw;
+ ct_priv->netdev = rpriv->netdev;
+ ct_priv->ct = mlx5_esw_chains_create_global_table(esw);
+ if (IS_ERR(ct_priv->ct)) {
+ err = PTR_ERR(ct_priv->ct);
+ mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err);
+ goto err_ct_tbl;
+ }
+
+ ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw);
+ if (IS_ERR(ct_priv->ct_nat)) {
+ err = PTR_ERR(ct_priv->ct_nat);
+ mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table",
+ err);
+ goto err_ct_nat_tbl;
+ }
+
+ ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw);
+ if (IS_ERR(ct_priv->post_ct)) {
+ err = PTR_ERR(ct_priv->post_ct);
+ mlx5_tc_ct_init_err(rpriv, "failed to create post ct table",
+ err);
+ goto err_post_ct_tbl;
+ }
+
+ idr_init(&ct_priv->fte_ids);
+ idr_init(&ct_priv->tuple_ids);
+ mutex_init(&ct_priv->control_lock);
+ rhashtable_init(&ct_priv->zone_ht, &zone_params);
+
+ /* Done, set ct_priv to know it initializted */
+ uplink_priv->ct_priv = ct_priv;
+
+ return 0;
+
+err_post_ct_tbl:
+ mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat);
+err_ct_nat_tbl:
+ mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct);
+err_ct_tbl:
+ kfree(ct_priv);
+err_alloc:
+err_support:
+
+ return 0;
+}
+
+void
+mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
+{
+ struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
+
+ if (!ct_priv)
+ return;
+
+ mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct);
+ mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat);
+ mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct);
+
+ rhashtable_destroy(&ct_priv->zone_ht);
+ mutex_destroy(&ct_priv->control_lock);
+ idr_destroy(&ct_priv->tuple_ids);
+ idr_destroy(&ct_priv->fte_ids);
+ kfree(ct_priv);
+
+ uplink_priv->ct_priv = NULL;
+}
+
+bool
+mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
+ struct sk_buff *skb, u32 tupleid)
+{
+ struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
+ struct mlx5_ct_zone_rule *zone_rule;
+ struct mlx5_ct_entry *entry;
+
+ if (!ct_priv || !tupleid)
+ return true;
+
+ zone_rule = idr_find(&ct_priv->tuple_ids, tupleid);
+ if (!zone_rule)
+ return false;
+
+ entry = container_of(zone_rule, struct mlx5_ct_entry,
+ zone_rules[zone_rule->nat]);
+ tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+
+ return true;
+}