summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS3
-rw-r--r--include/linux/netfilter/nfnetlink.h1
-rw-r--r--include/net/netfilter/nf_tables.h21
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h27
-rw-r--r--include/uapi/linux/netfilter/nfnetlink.h12
-rw-r--r--net/netfilter/Kconfig10
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/nf_conntrack_expect.c8
-rw-r--r--net/netfilter/nf_conntrack_sip.c12
-rw-r--r--net/netfilter/nf_tables_api.c89
-rw-r--r--net/netfilter/nfnetlink.c90
-rw-r--r--net/netfilter/nft_ct.c195
-rw-r--r--net/netfilter/nft_exthdr.c139
-rw-r--r--net/netfilter/nft_set_bitmap.c314
-rw-r--r--net/netfilter/nft_set_hash.c16
-rw-r--r--net/netfilter/nft_set_rbtree.c16
16 files changed, 832 insertions, 122 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 2a86fb06a134..62b0307a7ca4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8587,9 +8587,8 @@ F: Documentation/networking/s2io.txt
F: Documentation/networking/vxge.txt
F: drivers/net/ethernet/neterion/
-NETFILTER ({IP,IP6,ARP,EB,NF}TABLES)
+NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
-M: Patrick McHardy <kaber@trash.net>
M: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
L: netfilter-devel@vger.kernel.org
L: coreteam@netfilter.org
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1d82dd5e9a08..1b49209dd5c7 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -28,6 +28,7 @@ struct nfnetlink_subsystem {
const struct nfnl_callback *cb; /* callback for individual types */
int (*commit)(struct net *net, struct sk_buff *skb);
int (*abort)(struct net *net, struct sk_buff *skb);
+ bool (*valid_genid)(struct net *net, u32 genid);
};
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 7dfdb517f0be..ac84686aaafb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -203,6 +203,7 @@ struct nft_set_elem {
struct nft_set;
struct nft_set_iter {
u8 genmask;
+ bool flush;
unsigned int count;
unsigned int skip;
int err;
@@ -243,11 +244,13 @@ enum nft_set_class {
* characteristics
*
* @size: required memory
- * @class: lookup performance class
+ * @lookup: lookup performance class
+ * @space: memory class
*/
struct nft_set_estimate {
unsigned int size;
- enum nft_set_class class;
+ enum nft_set_class lookup;
+ enum nft_set_class space;
};
struct nft_set_ext;
@@ -260,7 +263,7 @@ struct nft_expr;
* @insert: insert new element into set
* @activate: activate new element in the next generation
* @deactivate: lookup for element and deactivate it in the next generation
- * @deactivate_one: deactivate element in the next generation
+ * @flush: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
* @privsize: function to return size of set private data
@@ -295,10 +298,11 @@ struct nft_set_ops {
void * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
- bool (*deactivate_one)(const struct net *net,
- const struct nft_set *set,
- void *priv);
- void (*remove)(const struct nft_set *set,
+ bool (*flush)(const struct net *net,
+ const struct nft_set *set,
+ void *priv);
+ void (*remove)(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem);
void (*walk)(const struct nft_ctx *ctx,
struct nft_set *set,
@@ -1198,10 +1202,13 @@ struct nft_trans {
struct nft_trans_rule {
struct nft_rule *rule;
+ u32 rule_id;
};
#define nft_trans_rule(trans) \
(((struct nft_trans_rule *)trans->data)->rule)
+#define nft_trans_rule_id(trans) \
+ (((struct nft_trans_rule *)trans->data)->rule_id)
struct nft_trans_set {
struct nft_set *set;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 7b730cab99bd..05215d30fe5c 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -207,6 +207,7 @@ enum nft_chain_attributes {
* @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
* @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
* @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
+ * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32)
*/
enum nft_rule_attributes {
NFTA_RULE_UNSPEC,
@@ -218,6 +219,7 @@ enum nft_rule_attributes {
NFTA_RULE_POSITION,
NFTA_RULE_USERDATA,
NFTA_RULE_PAD,
+ NFTA_RULE_ID,
__NFTA_RULE_MAX
};
#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
@@ -704,13 +706,32 @@ enum nft_payload_attributes {
};
#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1)
+enum nft_exthdr_flags {
+ NFT_EXTHDR_F_PRESENT = (1 << 0),
+};
+
+/**
+ * enum nft_exthdr_op - nf_tables match options
+ *
+ * @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers
+ * @NFT_EXTHDR_OP_TCP: match against tcp options
+ */
+enum nft_exthdr_op {
+ NFT_EXTHDR_OP_IPV6,
+ NFT_EXTHDR_OP_TCPOPT,
+ __NFT_EXTHDR_OP_MAX
+};
+#define NFT_EXTHDR_OP_MAX (__NFT_EXTHDR_OP_MAX - 1)
+
/**
- * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes
+ * enum nft_exthdr_attributes - nf_tables extension header expression netlink attributes
*
* @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
* @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
* @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
* @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
+ * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U8)
*/
enum nft_exthdr_attributes {
NFTA_EXTHDR_UNSPEC,
@@ -718,6 +739,8 @@ enum nft_exthdr_attributes {
NFTA_EXTHDR_TYPE,
NFTA_EXTHDR_OFFSET,
NFTA_EXTHDR_LEN,
+ NFTA_EXTHDR_FLAGS,
+ NFTA_EXTHDR_OP,
__NFTA_EXTHDR_MAX
};
#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1)
@@ -864,6 +887,7 @@ enum nft_rt_attributes {
* @NFT_CT_PKTS: conntrack packets
* @NFT_CT_BYTES: conntrack bytes
* @NFT_CT_AVGPKT: conntrack average bytes per packet
+ * @NFT_CT_ZONE: conntrack zone
*/
enum nft_ct_keys {
NFT_CT_STATE,
@@ -883,6 +907,7 @@ enum nft_ct_keys {
NFT_CT_PKTS,
NFT_CT_BYTES,
NFT_CT_AVGPKT,
+ NFT_CT_ZONE,
};
/**
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 4bb8cb7730e7..a09906a30d77 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -65,4 +65,16 @@ struct nfgenmsg {
#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE
#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1
+/**
+ * enum nfnl_batch_attributes - nfnetlink batch netlink attributes
+ *
+ * @NFNL_BATCH_GENID: generation ID for this changeset (NLA_U32)
+ */
+enum nfnl_batch_attributes {
+ NFNL_BATCH_UNSPEC,
+ NFNL_BATCH_GENID,
+ __NFNL_BATCH_MAX
+};
+#define NFNL_BATCH_MAX (__NFNL_BATCH_MAX - 1)
+
#endif /* _UAPI_NFNETLINK_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index dfbe9deeb8c4..9b28864cc36a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -467,10 +467,10 @@ config NF_TABLES_NETDEV
This option enables support for the "netdev" table.
config NFT_EXTHDR
- tristate "Netfilter nf_tables IPv6 exthdr module"
+ tristate "Netfilter nf_tables exthdr module"
help
This option adds the "exthdr" expression that you can use to match
- IPv6 extension headers.
+ IPv6 extension headers and tcp options.
config NFT_META
tristate "Netfilter nf_tables meta module"
@@ -509,6 +509,12 @@ config NFT_SET_HASH
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
+config NFT_SET_BITMAP
+ tristate "Netfilter nf_tables bitmap set module"
+ help
+ This option adds the "bitmap" set type that is used to build sets
+ whose keys are smaller or equal to 16 bits.
+
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6b3034f12661..c9b78e7b342f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
+obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index f8dbacf66795..e19a69787d99 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -353,7 +353,7 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)
}
EXPORT_SYMBOL_GPL(nf_ct_expect_put);
-static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
struct nf_conntrack_helper *helper;
@@ -380,7 +380,6 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
add_timer(&exp->timeout);
NF_CT_STAT_INC(net, expect_create);
- return 0;
}
/* Race with expectations being used means we could have none to find; OK. */
@@ -464,9 +463,8 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
if (ret <= 0)
goto out;
- ret = nf_ct_expect_insert(expect);
- if (ret < 0)
- goto out;
+ nf_ct_expect_insert(expect);
+
spin_unlock_bh(&nf_conntrack_expect_lock);
nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
return ret;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c3fc14e021ec..24174c520239 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -809,13 +809,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
exp->tuple.dst.protonum != proto ||
exp->tuple.dst.u.udp.port != port)
continue;
- if (!del_timer(&exp->timeout))
- continue;
- exp->flags &= ~NF_CT_EXPECT_INACTIVE;
- exp->timeout.expires = jiffies + expires * HZ;
- add_timer(&exp->timeout);
- found = 1;
- break;
+ if (mod_timer_pending(&exp->timeout, jiffies + expires * HZ)) {
+ exp->flags &= ~NF_CT_EXPECT_INACTIVE;
+ found = 1;
+ break;
+ }
}
spin_unlock_bh(&nf_conntrack_expect_lock);
return found;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 57eeae63f597..ff7304ae58ac 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -240,6 +240,10 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
if (trans == NULL)
return NULL;
+ if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) {
+ nft_trans_rule_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
+ }
nft_trans_rule(trans) = rule;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -2293,6 +2297,22 @@ err1:
return err;
}
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nlattr *nla)
+{
+ u32 id = ntohl(nla_get_be32(nla));
+ struct nft_trans *trans;
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ struct nft_rule *rule = nft_trans_rule(trans);
+
+ if (trans->msg_type == NFT_MSG_NEWRULE &&
+ id == nft_trans_rule_id(trans))
+ return rule;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
static int nf_tables_delrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2331,6 +2351,12 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
return PTR_ERR(rule);
err = nft_delrule(&ctx, rule);
+ } else if (nla[NFTA_RULE_ID]) {
+ rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ err = nft_delrule(&ctx, rule);
} else {
err = nft_delrule_by_chain(&ctx);
}
@@ -2398,12 +2424,14 @@ nft_select_set_ops(const struct nlattr * const nla[],
features = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT |
+ NFT_SET_OBJECT;
}
- bops = NULL;
- best.size = ~0;
- best.class = ~0;
+ bops = NULL;
+ best.size = ~0;
+ best.lookup = ~0;
+ best.space = ~0;
list_for_each_entry(ops, &nf_tables_set_ops, list) {
if ((ops->features & features) != features)
@@ -2413,16 +2441,27 @@ nft_select_set_ops(const struct nlattr * const nla[],
switch (policy) {
case NFT_SET_POL_PERFORMANCE:
- if (est.class < best.class)
- break;
- if (est.class == best.class && est.size < best.size)
+ if (est.lookup < best.lookup)
break;
+ if (est.lookup == best.lookup) {
+ if (!desc->size) {
+ if (est.space < best.space)
+ break;
+ } else if (est.size < best.size) {
+ break;
+ }
+ }
continue;
case NFT_SET_POL_MEMORY:
- if (est.size < best.size)
- break;
- if (est.size == best.size && est.class < best.class)
+ if (!desc->size) {
+ if (est.space < best.space)
+ break;
+ if (est.space == best.space &&
+ est.lookup < best.lookup)
+ break;
+ } else if (est.size < best.size) {
break;
+ }
continue;
default:
break;
@@ -3121,6 +3160,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_bind_check_setelem;
+ iter.flush = false;
set->ops->walk(ctx, set, &iter);
if (iter.err < 0)
@@ -3374,6 +3414,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.iter.count = 0;
args.iter.err = 0;
args.iter.fn = nf_tables_dump_setelem;
+ args.iter.flush = false;
set->ops->walk(&ctx, set, &args.iter);
nla_nest_end(skb, nest);
@@ -3752,7 +3793,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return 0;
err6:
- set->ops->remove(set, &elem);
+ set->ops->remove(ctx->net, set, &elem);
err5:
kfree(trans);
err4:
@@ -3898,7 +3939,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
if (!trans)
return -ENOMEM;
- if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) {
+ if (!set->ops->flush(ctx->net, set, elem->priv)) {
err = -ENOENT;
goto err1;
}
@@ -3936,15 +3977,14 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
- struct nft_set_dump_args args = {
- .iter = {
- .genmask = genmask,
- .fn = nft_flush_set,
- },
+ struct nft_set_iter iter = {
+ .genmask = genmask,
+ .fn = nft_flush_set,
+ .flush = true,
};
- set->ops->walk(&ctx, set, &args.iter);
+ set->ops->walk(&ctx, set, &iter);
- return args.iter.err;
+ return iter.err;
}
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
@@ -4804,7 +4844,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_DELSETELEM, 0);
- te->set->ops->remove(te->set, &te->elem);
+ te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
te->set->ndeact--;
break;
@@ -4925,7 +4965,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->remove(te->set, &te->elem);
+ te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
@@ -4959,6 +4999,11 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
return 0;
}
+static bool nf_tables_valid_genid(struct net *net, u32 genid)
+{
+ return net->nft.base_seq == genid;
+}
+
static const struct nfnetlink_subsystem nf_tables_subsys = {
.name = "nf_tables",
.subsys_id = NFNL_SUBSYS_NFTABLES,
@@ -4966,6 +5011,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
.cb = nf_tables_cb,
.commit = nf_tables_commit,
.abort = nf_tables_abort,
+ .valid_genid = nf_tables_valid_genid,
};
int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -5091,6 +5137,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_loop_check_setelem;
+ iter.flush = false;
set->ops->walk(ctx, set, &iter);
if (iter.err < 0)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a09fa9fd8f3d..a2148d0bc50e 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -3,7 +3,7 @@
*
* (C) 2001 by Jay Schulist <jschlst@samba.org>,
* (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2005-2017 by Pablo Neira Ayuso <pablo@netfilter.org>
*
* Initial netfilter messages via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -100,9 +100,9 @@ int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
}
EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
-static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
+static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u16 type)
{
- u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
+ u8 subsys_id = NFNL_SUBSYS_ID(type);
if (subsys_id >= NFNL_SUBSYS_COUNT)
return NULL;
@@ -111,9 +111,9 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t
}
static inline const struct nfnl_callback *
-nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
+nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss)
{
- u_int8_t cb_id = NFNL_MSG_TYPE(type);
+ u8 cb_id = NFNL_MSG_TYPE(type);
if (cb_id >= ss->cb_count)
return NULL;
@@ -185,7 +185,7 @@ replay:
{
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+ u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
@@ -273,7 +273,7 @@ enum {
};
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
- u_int16_t subsys_id)
+ u16 subsys_id, u32 genid)
{
struct sk_buff *oskb = skb;
struct net *net = sock_net(skb->sk);
@@ -315,6 +315,12 @@ replay:
return kfree_skb(skb);
}
+ if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) {
+ nfnl_unlock(subsys_id);
+ netlink_ack(oskb, nlh, -ERESTART);
+ return kfree_skb(skb);
+ }
+
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@@ -365,7 +371,7 @@ replay:
{
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+ u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
@@ -436,11 +442,51 @@ done:
kfree_skb(skb);
}
+static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
+ [NFNL_BATCH_GENID] = { .type = NLA_U32 },
+};
+
+static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+ struct nlattr *attr = (void *)nlh + min_len;
+ struct nlattr *cda[NFNL_BATCH_MAX + 1];
+ int attrlen = nlh->nlmsg_len - min_len;
+ struct nfgenmsg *nfgenmsg;
+ int msglen, err;
+ u32 gen_id = 0;
+ u16 res_id;
+
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+ return;
+
+ err = nla_parse(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy);
+ if (err < 0) {
+ netlink_ack(skb, nlh, err);
+ return;
+ }
+ if (cda[NFNL_BATCH_GENID])
+ gen_id = ntohl(nla_get_be32(cda[NFNL_BATCH_GENID]));
+
+ nfgenmsg = nlmsg_data(nlh);
+ skb_pull(skb, msglen);
+ /* Work around old nft using host byte order */
+ if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
+ res_id = NFNL_SUBSYS_NFTABLES;
+ else
+ res_id = ntohs(nfgenmsg->res_id);
+
+ nfnetlink_rcv_batch(skb, nlh, res_id, gen_id);
+}
+
static void nfnetlink_rcv(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
- u_int16_t res_id;
- int msglen;
if (nlh->nlmsg_len < NLMSG_HDRLEN ||
skb->len < nlh->nlmsg_len)
@@ -451,28 +497,10 @@ static void nfnetlink_rcv(struct sk_buff *skb)
return;
}
- if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
- struct nfgenmsg *nfgenmsg;
-
- msglen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (msglen > skb->len)
- msglen = skb->len;
-
- if (nlh->nlmsg_len < NLMSG_HDRLEN ||
- skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
- return;
-
- nfgenmsg = nlmsg_data(nlh);
- skb_pull(skb, msglen);
- /* Work around old nft using host byte order */
- if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
- res_id = NFNL_SUBSYS_NFTABLES;
- else
- res_id = ntohs(nfgenmsg->res_id);
- nfnetlink_rcv_batch(skb, nlh, res_id);
- } else {
+ if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN)
+ nfnetlink_rcv_skb_batch(skb, nlh);
+ else
netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
- }
}
#ifdef CONFIG_MODULES
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 66a2377510e1..c6b8022c0e47 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -32,6 +32,11 @@ struct nft_ct {
};
};
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
+static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+#endif
+
static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
enum nft_ct_keys k,
enum ip_conntrack_dir d)
@@ -151,6 +156,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
case NFT_CT_PROTOCOL:
*dest = nf_ct_protonum(ct);
return;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE: {
+ const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+ if (priv->dir < IP_CT_DIR_MAX)
+ *dest = nf_ct_zone_id(zone, priv->dir);
+ else
+ *dest = zone->id;
+
+ return;
+ }
+#endif
default:
break;
}
@@ -179,6 +196,53 @@ err:
regs->verdict.code = NFT_BREAK;
}
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_set_zone_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ enum ip_conntrack_info ctinfo;
+ u16 value = regs->data[priv->sreg];
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) /* already tracked */
+ return;
+
+ zone.id = value;
+
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ zone.dir = NF_CT_ZONE_DIR_ORIG;
+ break;
+ case IP_CT_DIR_REPLY:
+ zone.dir = NF_CT_ZONE_DIR_REPL;
+ break;
+ default:
+ break;
+ }
+
+ ct = this_cpu_read(nft_ct_pcpu_template);
+
+ if (likely(atomic_read(&ct->ct_general.use) == 1)) {
+ nf_ct_zone_add(ct, &zone);
+ } else {
+ /* previous skb got queued to userspace */
+ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
+ if (!ct) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+ }
+
+ atomic_inc(&ct->ct_general.use);
+ nf_ct_set(skb, ct, IP_CT_NEW);
+}
+#endif
+
static void nft_ct_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -257,6 +321,45 @@ static void nft_ct_netns_put(struct net *net, uint8_t family)
nf_ct_netns_put(net, family);
}
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void nft_ct_tmpl_put_pcpu(void)
+{
+ struct nf_conn *ct;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ct = per_cpu(nft_ct_pcpu_template, cpu);
+ if (!ct)
+ break;
+ nf_ct_put(ct);
+ per_cpu(nft_ct_pcpu_template, cpu) = NULL;
+ }
+}
+
+static bool nft_ct_tmpl_alloc_pcpu(void)
+{
+ struct nf_conntrack_zone zone = { .id = 0 };
+ struct nf_conn *tmp;
+ int cpu;
+
+ if (nft_ct_pcpu_template_refcnt)
+ return true;
+
+ for_each_possible_cpu(cpu) {
+ tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
+ if (!tmp) {
+ nft_ct_tmpl_put_pcpu();
+ return false;
+ }
+
+ atomic_set(&tmp->ct_general.use, 1);
+ per_cpu(nft_ct_pcpu_template, cpu) = tmp;
+ }
+
+ return true;
+}
+#endif
+
static int nft_ct_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -266,6 +369,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+ priv->dir = IP_CT_DIR_MAX;
switch (priv->key) {
case NFT_CT_DIRECTION:
if (tb[NFTA_CT_DIRECTION] != NULL)
@@ -333,11 +437,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
case NFT_CT_BYTES:
case NFT_CT_PKTS:
case NFT_CT_AVGPKT:
- /* no direction? return sum of original + reply */
- if (tb[NFTA_CT_DIRECTION] == NULL)
- priv->dir = IP_CT_DIR_MAX;
len = sizeof(u64);
break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ len = sizeof(u16);
+ break;
+#endif
default:
return -EOPNOTSUPP;
}
@@ -371,15 +477,33 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
return 0;
}
+static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
+{
+ switch (priv->key) {
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case NFT_CT_LABELS:
+ nf_connlabels_put(ctx->net);
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ if (--nft_ct_pcpu_template_refcnt == 0)
+ nft_ct_tmpl_put_pcpu();
+#endif
+ default:
+ break;
+ }
+}
+
static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
- bool label_got = false;
unsigned int len;
int err;
+ priv->dir = IP_CT_DIR_MAX;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -397,13 +521,30 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
if (err)
return err;
- label_got = true;
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case NFT_CT_ZONE:
+ if (!nft_ct_tmpl_alloc_pcpu())
+ return -ENOMEM;
+ nft_ct_pcpu_template_refcnt++;
break;
#endif
default:
return -EOPNOTSUPP;
}
+ if (tb[NFTA_CT_DIRECTION]) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
@@ -416,8 +557,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
return 0;
err1:
- if (label_got)
- nf_connlabels_put(ctx->net);
+ __nft_ct_set_destroy(ctx, priv);
return err;
}
@@ -432,16 +572,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx,
{
struct nft_ct *priv = nft_expr_priv(expr);
- switch (priv->key) {
-#ifdef CONFIG_NF_CONNTRACK_LABELS
- case NFT_CT_LABELS:
- nf_connlabels_put(ctx->net);
- break;
-#endif
- default:
- break;
- }
-
+ __nft_ct_set_destroy(ctx, priv);
nft_ct_netns_put(ctx->net, ctx->afi->family);
}
@@ -465,6 +596,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
case NFT_CT_BYTES:
case NFT_CT_PKTS:
case NFT_CT_AVGPKT:
+ case NFT_CT_ZONE:
if (priv->dir < IP_CT_DIR_MAX &&
nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
goto nla_put_failure;
@@ -487,6 +619,17 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
+
+ switch (priv->key) {
+ case NFT_CT_ZONE:
+ if (priv->dir < IP_CT_DIR_MAX &&
+ nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ break;
+ default:
+ break;
+ }
+
return 0;
nla_put_failure:
@@ -512,6 +655,17 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.dump = nft_ct_set_dump,
};
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static const struct nft_expr_ops nft_ct_set_zone_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_set_zone_eval,
+ .init = nft_ct_set_init,
+ .destroy = nft_ct_set_destroy,
+ .dump = nft_ct_set_dump,
+};
+#endif
+
static const struct nft_expr_ops *
nft_ct_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -525,8 +679,13 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DREG])
return &nft_ct_get_ops;
- if (tb[NFTA_CT_SREG])
+ if (tb[NFTA_CT_SREG]) {
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
+ return &nft_ct_set_zone_ops;
+#endif
return &nft_ct_set_ops;
+ }
return ERR_PTR(-EINVAL);
}
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 47beb3abcc9d..c308920b194c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -15,19 +15,29 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-// FIXME:
-#include <net/ipv6.h>
+#include <net/tcp.h>
struct nft_exthdr {
u8 type;
u8 offset;
u8 len;
+ u8 op;
enum nft_registers dreg:8;
+ u8 flags;
};
-static void nft_exthdr_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static unsigned int optlen(const u8 *opt, unsigned int offset)
+{
+ /* Beware zero-length options: make finite progress */
+ if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
+ return 1;
+ else
+ return opt[offset + 1];
+}
+
+static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_exthdr *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
@@ -35,8 +45,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
int err;
err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
- if (err < 0)
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ *dest = (err >= 0);
+ return;
+ } else if (err < 0) {
goto err;
+ }
offset += priv->offset;
dest[priv->len / NFT_REG32_SIZE] = 0;
@@ -47,11 +61,59 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int i, optl, tcphdr_len, offset;
+ u32 *dest = &regs->data[priv->dreg];
+ struct tcphdr *tcph;
+ u8 *opt;
+
+ if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ goto err;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
+ if (!tcph)
+ goto err;
+
+ tcphdr_len = __tcp_hdrlen(tcph);
+ if (tcphdr_len < sizeof(*tcph))
+ goto err;
+
+ tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+ if (!tcph)
+ goto err;
+
+ opt = (u8 *)tcph;
+ for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+ optl = optlen(opt, i);
+
+ if (priv->type != opt[i])
+ continue;
+
+ if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+ goto err;
+
+ offset = i + priv->offset;
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ memcpy(dest, opt + offset, priv->len);
+
+ return;
+ }
+
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
[NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
[NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
};
static int nft_exthdr_init(const struct nft_ctx *ctx,
@@ -59,13 +121,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- u32 offset, len;
+ u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
int err;
- if (tb[NFTA_EXTHDR_DREG] == NULL ||
- tb[NFTA_EXTHDR_TYPE] == NULL ||
- tb[NFTA_EXTHDR_OFFSET] == NULL ||
- tb[NFTA_EXTHDR_LEN] == NULL)
+ if (!tb[NFTA_EXTHDR_DREG] ||
+ !tb[NFTA_EXTHDR_TYPE] ||
+ !tb[NFTA_EXTHDR_OFFSET] ||
+ !tb[NFTA_EXTHDR_LEN])
return -EINVAL;
err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
@@ -76,10 +138,27 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (tb[NFTA_EXTHDR_FLAGS]) {
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags);
+ if (err < 0)
+ return err;
+
+ if (flags & ~NFT_EXTHDR_F_PRESENT)
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_EXTHDR_OP]) {
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+ if (err < 0)
+ return err;
+ }
+
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
+ priv->flags = flags;
+ priv->op = op;
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, priv->len);
@@ -97,6 +176,10 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -104,17 +187,45 @@ nla_put_failure:
}
static struct nft_expr_type nft_exthdr_type;
-static const struct nft_expr_ops nft_exthdr_ops = {
+static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
- .eval = nft_exthdr_eval,
+ .eval = nft_exthdr_ipv6_eval,
.init = nft_exthdr_init,
.dump = nft_exthdr_dump,
};
+static const struct nft_expr_ops nft_exthdr_tcp_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_tcp_eval,
+ .init = nft_exthdr_init,
+ .dump = nft_exthdr_dump,
+};
+
+static const struct nft_expr_ops *
+nft_exthdr_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ u32 op;
+
+ if (!tb[NFTA_EXTHDR_OP])
+ return &nft_exthdr_ipv6_ops;
+
+ op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+ switch (op) {
+ case NFT_EXTHDR_OP_TCPOPT:
+ return &nft_exthdr_tcp_ops;
+ case NFT_EXTHDR_OP_IPV6:
+ return &nft_exthdr_ipv6_ops;
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_exthdr_type __read_mostly = {
.name = "exthdr",
- .ops = &nft_exthdr_ops,
+ .select_ops = &nft_exthdr_select_ops,
.policy = nft_exthdr_policy,
.maxattr = NFTA_EXTHDR_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
new file mode 100644
index 000000000000..97f9649bcc7e
--- /dev/null
+++ b/net/netfilter/nft_set_bitmap.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2017 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* This bitmap uses two bits to represent one element. These two bits determine
+ * the element state in the current and the future generation.
+ *
+ * An element can be in three states. The generation cursor is represented using
+ * the ^ character, note that this cursor shifts on every succesful transaction.
+ * If no transaction is going on, we observe all elements are in the following
+ * state:
+ *
+ * 11 = this element is active in the current generation. In case of no updates,
+ * ^ it stays active in the next generation.
+ * 00 = this element is inactive in the current generation. In case of no
+ * ^ updates, it stays inactive in the next generation.
+ *
+ * On transaction handling, we observe these two temporary states:
+ *
+ * 01 = this element is inactive in the current generation and it becomes active
+ * ^ in the next one. This happens when the element is inserted but commit
+ * path has not yet been executed yet, so activation is still pending. On
+ * transaction abortion, the element is removed.
+ * 10 = this element is active in the current generation and it becomes inactive
+ * ^ in the next one. This happens when the element is deactivated but commit
+ * path has not yet been executed yet, so removal is still pending. On
+ * transation abortion, the next generation bit is reset to go back to
+ * restore its previous state.
+ */
+struct nft_bitmap {
+ u16 bitmap_size;
+ u8 bitmap[];
+};
+
+static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+{
+ u32 k = (key << 1);
+
+ *idx = k / BITS_PER_BYTE;
+ *off = k % BITS_PER_BYTE;
+}
+
+/* Fetch the two bits that represent the element and check if it is active based
+ * on the generation mask.
+ */
+static inline bool
+nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
+{
+ return (bitmap[idx] & (0x3 << off)) & (genmask << off);
+}
+
+static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ u32 idx, off;
+
+ nft_bitmap_location(*key, &idx, &off);
+
+ return nft_bitmap_active(priv->bitmap, idx, off, genmask);
+}
+
+static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ struct nft_set_ext **_ext)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return -EEXIST;
+
+ /* Enter 01 state. */
+ priv->bitmap[idx] |= (genmask << off);
+
+ return 0;
+}
+
+static void nft_bitmap_remove(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 00 state. */
+ priv->bitmap[idx] &= ~(genmask << off);
+}
+
+static void nft_bitmap_activate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext *ext = elem->priv;
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 11 state. */
+ priv->bitmap[idx] |= (genmask << off);
+}
+
+static bool nft_bitmap_flush(const struct net *net,
+ const struct nft_set *set, void *ext)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ u32 idx, off;
+
+ nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ /* Enter 10 state, similar to deactivation. */
+ priv->bitmap[idx] &= ~(genmask << off);
+
+ return true;
+}
+
+static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_ext *ext;
+
+ nft_set_ext_prepare(&tmpl);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+
+ ext = kzalloc(tmpl.len, GFP_KERNEL);
+ if (!ext)
+ return NULL;
+
+ nft_set_ext_init(ext, &tmpl);
+ memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
+
+ return ext;
+}
+
+static void *nft_bitmap_deactivate(const struct net *net,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
+ struct nft_set_ext *ext;
+ u32 idx, off, key = 0;
+
+ memcpy(&key, elem->key.val.data, set->klen);
+ nft_bitmap_location(key, &idx, &off);
+
+ if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return NULL;
+
+ /* We have no real set extension since this is a bitmap, allocate this
+ * dummy object that is released from the commit/abort path.
+ */
+ ext = nft_bitmap_ext_alloc(set, elem);
+ if (!ext)
+ return NULL;
+
+ /* Enter 10 state. */
+ priv->bitmap[idx] &= ~(genmask << off);
+
+ return ext;
+}
+
+static void nft_bitmap_walk(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+ int idx, off;
+ u16 key;
+
+ nft_set_ext_prepare(&tmpl);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+
+ for (idx = 0; idx < priv->bitmap_size; idx++) {
+ for (off = 0; off < BITS_PER_BYTE; off += 2) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ if (!nft_bitmap_active(priv->bitmap, idx, off,
+ iter->genmask))
+ goto cont;
+
+ ext = kzalloc(tmpl.len, GFP_KERNEL);
+ if (!ext) {
+ iter->err = -ENOMEM;
+ return;
+ }
+ nft_set_ext_init(ext, &tmpl);
+ key = ((idx * BITS_PER_BYTE) + off) >> 1;
+ memcpy(nft_set_ext_key(ext), &key, set->klen);
+
+ elem.priv = ext;
+ iter->err = iter->fn(ctx, set, iter, &elem);
+
+ /* On set flush, this dummy extension object is released
+ * from the commit/abort path.
+ */
+ if (!iter->flush)
+ kfree(ext);
+
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+ }
+}
+
+/* The bitmap size is pow(2, key length in bits) / bits per byte. This is
+ * multiplied by two since each element takes two bits. For 8 bit keys, the
+ * bitmap consumes 66 bytes. For 16 bit keys, 16388 bytes.
+ */
+static inline u32 nft_bitmap_size(u32 klen)
+{
+ return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
+}
+
+static inline u32 nft_bitmap_total_size(u32 klen)
+{
+ return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
+}
+
+static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[])
+{
+ u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+
+ return nft_bitmap_total_size(klen);
+}
+
+static int nft_bitmap_init(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ const struct nlattr * const nla[])
+{
+ struct nft_bitmap *priv = nft_set_priv(set);
+
+ priv->bitmap_size = nft_bitmap_total_size(set->klen);
+
+ return 0;
+}
+
+static void nft_bitmap_destroy(const struct nft_set *set)
+{
+}
+
+static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
+ struct nft_set_estimate *est)
+{
+ /* Make sure bitmaps we don't get bitmaps larger than 16 Kbytes. */
+ if (desc->klen > 2)
+ return false;
+
+ est->size = nft_bitmap_total_size(desc->klen);
+ est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_1;
+
+ return true;
+}
+
+static struct nft_set_ops nft_bitmap_ops __read_mostly = {
+ .privsize = nft_bitmap_privsize,
+ .estimate = nft_bitmap_estimate,
+ .init = nft_bitmap_init,
+ .destroy = nft_bitmap_destroy,
+ .insert = nft_bitmap_insert,
+ .remove = nft_bitmap_remove,
+ .deactivate = nft_bitmap_deactivate,
+ .flush = nft_bitmap_flush,
+ .activate = nft_bitmap_activate,
+ .lookup = nft_bitmap_lookup,
+ .walk = nft_bitmap_walk,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_bitmap_module_init(void)
+{
+ return nft_register_set(&nft_bitmap_ops);
+}
+
+static void __exit nft_bitmap_module_exit(void)
+{
+ nft_unregister_set(&nft_bitmap_ops);
+}
+
+module_init(nft_bitmap_module_init);
+module_exit(nft_bitmap_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index e36069fb76ae..5f652720fc78 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -167,8 +167,8 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
nft_set_elem_clear_busy(&he->ext);
}
-static bool nft_hash_deactivate_one(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_hash_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
struct nft_hash_elem *he = priv;
@@ -195,7 +195,7 @@ static void *nft_hash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
if (he != NULL &&
- !nft_hash_deactivate_one(net, set, he))
+ !nft_hash_flush(net, set, he))
he = NULL;
rcu_read_unlock();
@@ -203,7 +203,8 @@ static void *nft_hash_deactivate(const struct net *net,
return he;
}
-static void nft_hash_remove(const struct nft_set *set,
+static void nft_hash_remove(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
@@ -383,7 +384,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
est->size = esize + 2 * sizeof(struct nft_hash_elem *);
}
- est->class = NFT_SET_CLASS_O_1;
+ est->lookup = NFT_SET_CLASS_O_1;
+ est->space = NFT_SET_CLASS_O_N;
return true;
}
@@ -397,12 +399,12 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
- .deactivate_one = nft_hash_deactivate_one,
+ .flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.update = nft_hash_update,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
+ .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index f06f55ee516d..71e8fb886a73 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -151,7 +151,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
return err;
}
-static void nft_rbtree_remove(const struct nft_set *set,
+static void nft_rbtree_remove(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree *priv = nft_set_priv(set);
@@ -171,8 +172,8 @@ static void nft_rbtree_activate(const struct net *net,
nft_set_elem_change_active(net, set, &rbe->ext);
}
-static bool nft_rbtree_deactivate_one(const struct net *net,
- const struct nft_set *set, void *priv)
+static bool nft_rbtree_flush(const struct net *net,
+ const struct nft_set *set, void *priv)
{
struct nft_rbtree_elem *rbe = priv;
@@ -213,7 +214,7 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
continue;
}
- nft_rbtree_deactivate_one(net, set, rbe);
+ nft_rbtree_flush(net, set, rbe);
return rbe;
}
}
@@ -290,7 +291,8 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
else
est->size = nsize;
- est->class = NFT_SET_CLASS_O_LOG_N;
+ est->lookup = NFT_SET_CLASS_O_LOG_N;
+ est->space = NFT_SET_CLASS_O_N;
return true;
}
@@ -304,11 +306,11 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
- .deactivate_one = nft_rbtree_deactivate_one,
+ .flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
- .features = NFT_SET_INTERVAL | NFT_SET_MAP,
+ .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
.owner = THIS_MODULE,
};