From 9b42c1f179a614e11893ae4619f0304a38f481ae Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 12:44:26 +0200
Subject: xfrm: Extend the output_mark to support input direction and masking.

We already support setting an output mark at the xfrm_state,
unfortunately this does not support the input direction and
masking the marks that will be applied to the skb. This change
adds support applying a masked value in both directions.

The existing XFRMA_OUTPUT_MARK number is reused for this purpose
and as it is now bi-directional, it is renamed to XFRMA_SET_MARK.

An additional XFRMA_SET_MARK_MASK attribute is added for setting the
mask. If the attribute mask not provided, it is set to 0xffffffff,
keeping the XFRMA_OUTPUT_MARK existing 'full mask' semantics.

Co-developed-by: Tobias Brunner <tobias@strongswan.org>
Co-developed-by: Eyal Birger <eyal.birger@gmail.com>
Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/net/xfrm.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 557122846e0e..3dc83ba26f62 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -166,7 +166,7 @@ struct xfrm_state {
 		int		header_len;
 		int		trailer_len;
 		u32		extra_flags;
-		u32		output_mark;
+		struct xfrm_mark	smark;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
@@ -2012,6 +2012,13 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
 	return ret;
 }
 
+static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
+{
+	struct xfrm_mark *m = &x->props.smark;
+
+	return (m->v & m->m) | (mark & ~m->m);
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {
-- 
cgit 


From d159ce7957eec306eacda672e5909e26675ca8ef Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:06:57 +0200
Subject: flow: Extend flow informations with xfrm interface id.

Add a new flowi_xfrm structure with informations needed to do
a xfrm lookup. At the moment it keeps the informations about
the new xfrm interface id needed to lookup xfrm interfaces
that are introduced with a followup patch. We need this new
lookup key as other possible keys, like the ifindex is
already part of the xfrm selector and used as a key to
enforce the output device after the transformation in the
policy/state lookup.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Benedict Wong <benedictwong@google.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/flow.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 8ce21793094e..187c9bef672f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,6 +26,10 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
+struct flowi_xfrm {
+	__u32			if_id;
+};
+
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -39,6 +43,7 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
+	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -78,6 +83,7 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
+#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -109,6 +115,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
+	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -138,6 +145,7 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
+#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -185,6 +193,7 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
+#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
-- 
cgit 


From 7e6526404adedf079279aa7aa11722deaca8fe2e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:07:07 +0200
Subject: xfrm: Add a new lookup key to match xfrm interfaces.

This patch adds the xfrm interface id as a lookup key
for xfrm states and policies. With this we can assign
states and policies to virtual xfrm interfaces.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Benedict Wong <benedictwong@google.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/xfrm.h | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3dc83ba26f62..e8bada4d2a45 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -147,6 +147,7 @@ struct xfrm_state {
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
+	u32			if_id;
 	u32			tfcpad;
 
 	u32			genid;
@@ -574,6 +575,7 @@ struct xfrm_policy {
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
+	u32			if_id;
 	struct xfrm_mark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
@@ -1533,7 +1535,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
 				   unsigned short family);
-struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark,
+struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
 				       unsigned short family,
@@ -1690,20 +1692,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 		     void *);
 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
 					  u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
-				     u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
+				     int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-				 u8 mode, u32 reqid, u8 proto,
+				 u8 mode, u32 reqid, u32 if_id, u8 proto,
 				 const xfrm_address_t *daddr,
 				 const xfrm_address_t *saddr, int create,
 				 unsigned short family);
@@ -2019,6 +2021,15 @@ static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
 	return (m->v & m->m) | (mark & ~m->m);
 }
 
+static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
+{
+	int ret = 0;
+
+	if (if_id)
+		ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
+	return ret;
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {
-- 
cgit 


From f203b76d78092faf248db3f851840fbecf80b40e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:07:12 +0200
Subject: xfrm: Add virtual xfrm interfaces

This patch adds support for virtual xfrm interfaces.
Packets that are routed through such an interface
are guaranteed to be IPsec transformed or dropped.
It is a generic virtual interface that ensures IPsec
transformation, no need to know what happens behind
the interface. This means that we can tunnel IPv4 and
IPv6 through the same interface and support all xfrm
modes (tunnel, transport and beet) on it.

Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Co-developed-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/xfrm.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e8bada4d2a45..3fa578a6a819 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -23,6 +23,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/flow.h>
+#include <net/gro_cells.h>
 
 #include <linux/interrupt.h>
 
@@ -293,6 +294,13 @@ struct xfrm_replay {
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
+struct xfrm_if_cb {
+	struct xfrm_if	*(*decode_session)(struct sk_buff *skb);
+};
+
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+void xfrm_if_unregister_cb(void);
+
 struct net_device;
 struct xfrm_type;
 struct xfrm_dst;
@@ -1039,6 +1047,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
+struct xfrm_if_parms {
+	char name[IFNAMSIZ];	/* name of XFRM device */
+	int link;		/* ifindex of underlying L2 interface */
+	u32 if_id;		/* interface identifyer */
+};
+
+struct xfrm_if {
+	struct xfrm_if __rcu *next;	/* next interface in list */
+	struct net_device *dev;		/* virtual device associated with interface */
+	struct net_device *phydev;	/* physical device */
+	struct net *net;		/* netns for packet i/o */
+	struct xfrm_if_parms p;		/* interface parms */
+
+	struct gro_cells gro_cells;
+};
+
 struct xfrm_offload {
 	/* Output sequence number for replay protection on offloading. */
 	struct {
-- 
cgit 


From e4db5b61c572475bbbcf63e3c8a2606bfccf2c9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:26:02 +0200
Subject: xfrm: policy: remove pcpu policy cache

Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3fa578a6a819..a5378613a49c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -332,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
-- 
cgit 


From a9744f7ca200c756e6f8c65b633770a2da711651 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 29 Jun 2018 09:48:20 +0200
Subject: xsk: fix potential race in SKB TX completion code

There is a potential race in the TX completion code for the SKB
case. One process enters the sendmsg code of an AF_XDP socket in order
to send a frame. The execution eventually trickles down to the driver
that is told to send the packet. However, it decides to drop the
packet due to some error condition (e.g., rings full) and frees the
SKB. This will trigger the SKB destructor and a completion will be
sent to the AF_XDP user space through its
single-producer/single-consumer queues.

At the same time a TX interrupt has fired on another core and it
dispatches the TX completion code in the driver. It does its HW
specific things and ends up freeing the SKB associated with the
transmitted packet. This will trigger the SKB destructor and a
completion will be sent to the AF_XDP user space through its
single-producer/single-consumer queues. With a pseudo call stack, it
would look like this:

Core 1:
sendmsg() being called in the application
  netdev_start_xmit()
    Driver entered through ndo_start_xmit
      Driver decides to free the SKB for some reason (e.g., rings full)
        Destructor of SKB called
          xskq_produce_addr() is called to signal completion to user space

Core 2:
TX completion irq
  NAPI loop
    Driver irq handler for TX completions
      Frees the SKB
        Destructor of SKB called
          xskq_produce_addr() is called to signal completion to user space

We now have a violation of the single-producer/single-consumer
principle for our queues as there are two threads trying to produce at
the same time on the same queue.

Fixed by introducing a spin_lock in the destructor. In regards to the
performance, I get around 1.74 Mpps for txonly before and after the
introduction of the spinlock. There is of course some impact due to
the spin lock but it is in the less significant digits that are too
noisy for me to measure. But let us say that the version without the
spin lock got 1.745 Mpps in the best case and the version with 1.735
Mpps in the worst case, then that would mean a maximum drop in
performance of 0.5%.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp_sock.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9fe472f2ac95..7161856bcf9c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -60,6 +60,10 @@ struct xdp_sock {
 	bool zc;
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
+	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
+	 * in the SKB destructor callback.
+	 */
+	spinlock_t tx_completion_lock;
 	u64 rx_dropped;
 };
 
-- 
cgit 


From 33bd5ac54dc47e002da4a395aaf9bf158dd17709 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 3 Jul 2018 14:36:21 -0700
Subject: net/ipv6: Revert attempt to simplify route replace and append

NetworkManager likes to manage linklocal prefix routes and does so with
the NLM_F_APPEND flag, breaking attempts to simplify the IPv6 route
code and by extension enable multipath routes with device only nexthops.

Revert f34436a43092 and these followup patches:
6eba08c3626b ("ipv6: Only emit append events for appended routes").
ce45bded6435 ("mlxsw: spectrum_router: Align with new route replace logic")
53b562df8c20 ("mlxsw: spectrum_router: Allow appending to dev-only routes")

Update the fib_tests cases to reflect the old behavior.

Fixes: f34436a43092 ("net/ipv6: Simplify route replace and appending into multipath route")
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_route.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 59656fc580df..7b9c82de11cc 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
+{
+	return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+	       RTF_GATEWAY;
+}
+
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
-- 
cgit 


From a9ba23d48dbc6ffd08426bb10f05720e0b9f5c14 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Wed, 4 Jul 2018 09:58:05 -0400
Subject: ipv6: make ipv6_renew_options() interrupt/kernel safe

At present the ipv6_renew_options_kern() function ends up calling into
access_ok() which is problematic if done from inside an interrupt as
access_ok() calls WARN_ON_IN_IRQ() on some (all?) architectures
(x86-64 is affected).  Example warning/backtrace is shown below:

 WARNING: CPU: 1 PID: 3144 at lib/usercopy.c:11 _copy_from_user+0x85/0x90
 ...
 Call Trace:
  <IRQ>
  ipv6_renew_option+0xb2/0xf0
  ipv6_renew_options+0x26a/0x340
  ipv6_renew_options_kern+0x2c/0x40
  calipso_req_setattr+0x72/0xe0
  netlbl_req_setattr+0x126/0x1b0
  selinux_netlbl_inet_conn_request+0x80/0x100
  selinux_inet_conn_request+0x6d/0xb0
  security_inet_conn_request+0x32/0x50
  tcp_conn_request+0x35f/0xe00
  ? __lock_acquire+0x250/0x16c0
  ? selinux_socket_sock_rcv_skb+0x1ae/0x210
  ? tcp_rcv_state_process+0x289/0x106b
  tcp_rcv_state_process+0x289/0x106b
  ? tcp_v6_do_rcv+0x1a7/0x3c0
  tcp_v6_do_rcv+0x1a7/0x3c0
  tcp_v6_rcv+0xc82/0xcf0
  ip6_input_finish+0x10d/0x690
  ip6_input+0x45/0x1e0
  ? ip6_rcv_finish+0x1d0/0x1d0
  ipv6_rcv+0x32b/0x880
  ? ip6_make_skb+0x1e0/0x1e0
  __netif_receive_skb_core+0x6f2/0xdf0
  ? process_backlog+0x85/0x250
  ? process_backlog+0x85/0x250
  ? process_backlog+0xec/0x250
  process_backlog+0xec/0x250
  net_rx_action+0x153/0x480
  __do_softirq+0xd9/0x4f7
  do_softirq_own_stack+0x2a/0x40
  </IRQ>
  ...

While not present in the backtrace, ipv6_renew_option() ends up calling
access_ok() via the following chain:

  access_ok()
  _copy_from_user()
  copy_from_user()
  ipv6_renew_option()

The fix presented in this patch is to perform the userspace copy
earlier in the call chain such that it is only called when the option
data is actually coming from userspace; that place is
do_ipv6_setsockopt().  Not only does this solve the problem seen in
the backtrace above, it also allows us to simplify the code quite a
bit by removing ipv6_renew_options_kern() completely.  We also take
this opportunity to cleanup ipv6_renew_options()/ipv6_renew_option()
a small amount as well.

This patch is heavily based on a rough patch by Al Viro.  I've taken
his original patch, converted a kmemdup() call in do_ipv6_setsockopt()
to a memdup_user() call, made better use of the e_inval jump target in
the same function, and cleaned up the use ipv6_renew_option() by
ipv6_renew_options().

CC: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 16475c269749..d02881e4ad1f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -355,14 +355,7 @@ struct ipv6_txoptions *ipv6_dup_options(struct sock *sk,
 struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
 					  struct ipv6_txoptions *opt,
 					  int newtype,
-					  struct ipv6_opt_hdr __user *newopt,
-					  int newoptlen);
-struct ipv6_txoptions *
-ipv6_renew_options_kern(struct sock *sk,
-			struct ipv6_txoptions *opt,
-			int newtype,
-			struct ipv6_opt_hdr *newopt,
-			int newoptlen);
+					  struct ipv6_opt_hdr *newopt);
 struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 					  struct ipv6_txoptions *opt);
 
-- 
cgit 


From cfdb0c2d095ac5d7f09cac1317b7d0a9e8178134 Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 29 Jun 2018 12:12:50 +0530
Subject: Bluetooth: Store Resolv list size

When the controller supports the Read LE Resolv List size feature, the
maximum list size are read and now stored.

Before patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 17.979791
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 17.980629
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 17.980786
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 17.981627
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #59 [hci0] 17.981786
> HCI Event: Command Complete (0x0e) plen 12               #60 [hci0] 17.982636
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

After patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 13.338168
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 13.338842
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 13.339029
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 13.339939
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 13.340152
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 13.340952
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #61 [hci0] 13.341180
> HCI Event: Command Complete (0x0e) plen 12               #62 [hci0] 13.341898
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 6 ++++++
 include/net/bluetooth/hci_core.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 1668211297a9..484f24c7a415 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1490,6 +1490,12 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
+struct hci_rp_le_read_resolv_list_size {
+	__u8	status;
+	__u8	size;
+} __packed;
+
 #define HCI_OP_LE_READ_MAX_DATA_LEN	0x202f
 struct hci_rp_le_read_max_data_len {
 	__u8	status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 893bbbb5d2fa..409f49bd8338 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -221,6 +221,7 @@ struct hci_dev {
 	__u8		features[HCI_MAX_PAGES][8];
 	__u8		le_features[8];
 	__u8		le_white_list_size;
+	__u8		le_resolv_list_size;
 	__u8		le_states[8];
 	__u8		commands[64];
 	__u8		hci_ver;
@@ -367,6 +368,7 @@ struct hci_dev {
 	struct list_head	identity_resolving_keys;
 	struct list_head	remote_oob_data;
 	struct list_head	le_white_list;
+	struct list_head	le_resolv_list;
 	struct list_head	le_conn_params;
 	struct list_head	pend_le_conns;
 	struct list_head	pend_le_reports;
-- 
cgit 


From 545f2596b907f0747170c7cb71edc74cecf68c5c Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 29 Jun 2018 12:13:20 +0530
Subject: Bluetooth: Add HCI command for clear Resolv list

Check for Resolv list supported by controller. So check the supported
commmand first before issuing this command i.e.,HCI_OP_LE_CLEAR_RESOLV_LIST

Before patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 13.338168
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 13.338842
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 13.339029
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 13.339939
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 13.340152
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 13.340952
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #61 [hci0] 13.341180
> HCI Event: Command Complete (0x0e) plen 12               #62 [hci0] 13.341898
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

After patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 28.919131
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 28.920016
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 28.920164
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 28.920873
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 28.921109
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 28.922016
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear Resolving... (0x08|0x0029) plen 0  #61 [hci0] 28.922166
> HCI Event: Command Complete (0x0e) plen 4                #62 [hci0] 28.922872
      LE Clear Resolving List (0x08|0x0029) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #63 [hci0] 28.923117
> HCI Event: Command Complete (0x0e) plen 12               #64 [hci0] 28.924030
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 484f24c7a415..4af1a3a4d9b1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1490,6 +1490,8 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_CLEAR_RESOLV_LIST	0x2029
+
 #define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
 struct hci_rp_le_read_resolv_list_size {
 	__u8	status;
-- 
cgit 


From 5711b4e89319c2912f20b2a4f371c1525fc9551d Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Thu, 5 Jul 2018 12:01:53 +0200
Subject: netfilter: nf_tproxy: fix possible non-linear access to transport
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes a silent out-of-bound read possibility that was present
because of the misuse of this function.

Mostly it was called with a struct udphdr *hp which had only the udphdr
part linearized by the skb_header_pointer, however
nf_tproxy_get_sock_v{4,6} uses it as a tcphdr pointer, so some reads for
tcp specific attributes may be invalid.

Fixes: a583636a83ea ("inet: refactor inet[6]_lookup functions to take skb")
Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 9754a50ecde9..4cc64c8446eb 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -64,7 +64,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
  * belonging to established connections going through that one.
  */
 struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
 		      const u8 protocol,
 		      const __be32 saddr, const __be32 daddr,
 		      const __be16 sport, const __be16 dport,
@@ -103,7 +103,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 			    struct sock *sk);
 
 struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
 		      const u8 protocol,
 		      const struct in6_addr *saddr, const struct in6_addr *daddr,
 		      const __be16 sport, const __be16 dport,
-- 
cgit 


From a948f713842ad5c23f125efc61dee6951893219c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 3 Jul 2018 15:05:48 -0500
Subject: nl80211/mac80211: allow non-linear skb in rx_control_port

The current implementation of cfg80211_rx_control_port assumed that the
caller could provide a contiguous region of memory for the control port
frame to be sent up to userspace.  Unfortunately, many drivers produce
non-linear skbs, especially for data frames.  This resulted in userspace
getting notified of control port frames with correct metadata (from
address, port, etc) yet garbage / nonsense contents, resulting in bad
handshakes, disconnections, etc.

mac80211 linearizes skbs containing management frames.  But it didn't
seem worthwhile to do this for control port frames.  Thus the signature
of cfg80211_rx_control_port was changed to take the skb directly.
nl80211 then takes care of obtaining control port frame data directly
from the (linear | non-linear) skb.

The caller is still responsible for freeing the skb,
cfg80211_rx_control_port does not take ownership of it.

Fixes: 6a671a50f819 ("nl80211: Add CMD_CONTROL_PORT_FRAME API")
Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[fix some kernel-doc formatting, add fixes tag]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5fbfe61f41c6..1beb3ead0385 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5835,10 +5835,11 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 /**
  * cfg80211_rx_control_port - notification about a received control port frame
  * @dev: The device the frame matched to
- * @buf: control port frame
- * @len: length of the frame data
- * @addr: The peer from which the frame was received
- * @proto: frame protocol, typically PAE or Pre-authentication
+ * @skb: The skbuf with the control port frame.  It is assumed that the skbuf
+ *	is 802.3 formatted (with 802.3 header).  The skb can be non-linear.
+ *	This function does not take ownership of the skb, so the caller is
+ *	responsible for any cleanup.  The caller must also ensure that
+ *	skb->protocol is set appropriately.
  * @unencrypted: Whether the frame was received unencrypted
  *
  * This function is used to inform userspace about a received control port
@@ -5851,8 +5852,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
  * Return: %true if the frame was passed to userspace
  */
 bool cfg80211_rx_control_port(struct net_device *dev,
-			      const u8 *buf, size_t len,
-			      const u8 *addr, u16 proto, bool unencrypted);
+			      struct sk_buff *skb, bool unencrypted);
 
 /**
  * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
-- 
cgit 


From e240cd0df48185a28c153f83a39ba3940e3e9b86 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 6 Jul 2018 19:06:43 +0200
Subject: netfilter: nf_tables: place all set backends in one single module

This patch disallows rbtree with single elements, which is causing
problems with the recent timeout support. Before this patch, you
could opt out individual set representations per module, which is
just adding extra complexity.

Fixes: 8d8540c4f5e0("netfilter: nft_set_rbtree: add timeout support")
Reported-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index e0c0c2558ec4..a05134507e7b 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -65,4 +65,10 @@ extern const struct nft_expr_ops nft_payload_fast_ops;
 extern struct static_key_false nft_counters_enabled;
 extern struct static_key_false nft_trace_enabled;
 
+extern struct nft_set_type nft_set_rhash_type;
+extern struct nft_set_type nft_set_hash_type;
+extern struct nft_set_type nft_set_hash_fast_type;
+extern struct nft_set_type nft_set_rbtree_type;
+extern struct nft_set_type nft_set_bitmap_type;
+
 #endif /* _NET_NF_TABLES_CORE_H */
-- 
cgit 


From a2344b9e3a8c5c2064306b0d99b0e9a6c4813c08 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 17:05:28 +0530
Subject: Bluetooth: Use extended scanning if controller supports

This implements Set extended scan param and set extended scan enable
commands and use it for start LE scan based on controller support.

The new features added in these commands are setting of new PHY for
scanning and setting of scan duration. Both features are disabled
for now, meaning only 1M PHY is set and scan duration is set to 0
which means that scanning will be done untill scan disable is called.

< HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 8
        Own address type: Random (0x01)
        Filter policy: Accept all advertisement (0x00)
        PHYs: 0x01
        Entry 0: LE 1M
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Parameters (0x08|0x0041) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
        Extended scan: Enabled (0x01)
        Filter duplicates: Enabled (0x01)
        Duration: 0 msec (0x0000)
        Period: 0.00 sec (0x0000)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Enable (0x08|0x0042) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 24 ++++++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  4 ++++
 2 files changed, 28 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4af1a3a4d9b1..8c2868f439e7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1514,6 +1514,30 @@ struct hci_cp_le_set_default_phy {
 	__u8    rx_phys;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_SCAN_PARAMS   0x2041
+struct hci_cp_le_set_ext_scan_params {
+	__u8    own_addr_type;
+	__u8    filter_policy;
+	__u8    scanning_phys;
+	__u8    data[0];
+} __packed;
+
+#define LE_SCAN_PHY_1M 0x01
+
+struct hci_cp_le_scan_phy_params {
+	__u8    type;
+	__le16  interval;
+	__le16  window;
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_SCAN_ENABLE   0x2042
+struct hci_cp_le_set_ext_scan_enable {
+	__u8    enable;
+	__u8    filter_dup;
+	__le16  duration;
+	__le16  period;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 409f49bd8338..cc0bde74dd45 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1158,6 +1158,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
 				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
+/* Use ext scanning if set ext scan param and ext scan enable is supported */
+#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
+			   ((dev)->commands[37] & 0x40))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
-- 
cgit 


From c215e9397b00b3045a668120ed7dbd89f2866e74 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 17:05:29 +0530
Subject: Bluetooth: Process extended ADV report event

This patch enables Extended ADV report event if extended scanning
is supported in the controller and process the same.

The new features are not handled and for now its as good as
legacy ADV report.

> HCI Event: LE Meta Event (0x3e) plen 53
      LE Extended Advertising Report (0x0d)
        Num reports: 1
        Entry 0
          Event type: 0x0013
            Props: 0x0013
              Connectable
              Scannable
              Use legacy advertising PDUs
            Data status: Complete
          Legacy PDU Type: ADV_IND (0x0013)
          Address type: Random (0x01)
          Address: DB:7E:2E:1A:85:E8 (Static)
          Primary PHY: LE 1M
          Secondary PHY: LE 1M
          SID: 0x00
          TX power: 0 dBm
          RSSI: -90 dBm (0xa6)
          Periodic advertising invteral: 0.00 msec (0x0000)
          Direct address type: Public (0x00)
          Direct address: 00:00:00:00:00:00 (OUI 00-00-00)
          Data length: 0x1b
        0f 09 44 65 73 69 67 6e 65 72 20 4d 6f 75 73 65  ..Designer Mouse
        03 19 c2 03 02 01 05 03 03 12 18                 ...........

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8c2868f439e7..0ec51eb14810 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1925,6 +1925,15 @@ struct hci_ev_le_conn_complete {
 #define LE_ADV_SCAN_IND		0x02
 #define LE_ADV_NONCONN_IND	0x03
 #define LE_ADV_SCAN_RSP		0x04
+#define LE_ADV_INVALID		0x05
+
+/* Legacy event types in extended adv report */
+#define LE_LEGACY_ADV_IND		0x0013
+#define LE_LEGACY_ADV_DIRECT_IND 	0x0015
+#define LE_LEGACY_ADV_SCAN_IND		0x0012
+#define LE_LEGACY_NONCONN_IND		0x0010
+#define LE_LEGACY_SCAN_RSP_ADV		0x001b
+#define LE_LEGACY_SCAN_RSP_ADV_SCAN	0x001a
 
 #define ADDR_LE_DEV_PUBLIC	0x00
 #define ADDR_LE_DEV_RANDOM	0x01
@@ -1989,6 +1998,23 @@ struct hci_ev_le_direct_adv_info {
 	__s8	 rssi;
 } __packed;
 
+#define HCI_EV_LE_EXT_ADV_REPORT    0x0d
+struct hci_ev_le_ext_adv_report {
+	__le16 	 evt_type;
+	__u8	 bdaddr_type;
+	bdaddr_t bdaddr;
+	__u8	 primary_phy;
+	__u8	 secondary_phy;
+	__u8	 sid;
+	__u8	 tx_power;
+	__s8	 rssi;
+	__le16 	 interval;
+	__u8  	 direct_addr_type;
+	bdaddr_t direct_addr;
+	__u8  	 length;
+	__u8	 data[0];
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
-- 
cgit 


From 4d94f95d30c8fbfe86068e9abed110974d697cf5 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 22:50:32 +0200
Subject: Bluetooth: Use extended LE Connection if supported

This implements extended LE craete connection and enhanced
LE conn complete event if the controller supports.

For now it is as good as legacy LE connection and event as
no new features in the extended connection is handled.

< HCI Command: LE Extended Create Connection (0x08|0x0043) plen 26
        Filter policy: White list is not used (0x00)
        Own address type: Public (0x00)
        Peer address type: Random (0x01)
        Peer address: DB:7E:2E:1D:85:E8 (Static)
        Initiating PHYs: 0x01
        Entry 0: LE 1M
          Scan interval: 60.000 msec (0x0060)
          Scan window: 60.000 msec (0x0060)
          Min connection interval: 50.00 msec (0x0028)
          Max connection interval: 70.00 msec (0x0038)
          Connection latency: 0 (0x0000)
          Supervision timeout: 420 msec (0x002a)
          Min connection length: 0.000 msec (0x0000)
          Max connection length: 0.000 msec (0x0000)
> HCI Event: Command Status (0x0f) plen 4
      LE Extended Create Connection (0x08|0x0043) ncmd 2
        Status: Success (0x00)
> HCI Event: LE Meta Event (0x3e) plen 31
      LE Enhanced Connection Complete (0x0a)
        Status: Success (0x00)
        Handle: 3585
        Role: Master (0x00)
        Peer address type: Random (0x01)
        Peer address: DB:7E:2E:1D:85:E8 (Static)
        Local resolvable private address: 00:00:00:00:00:00 (Non-Resolvable)
        Peer resolvable private address: 00:00:00:00:00:00 (Non-Resolvable)
        Connection interval: 67.50 msec (0x0036)
        Connection latency: 0 (0x0000)
        Supervision timeout: 420 msec (0x002a)
        Master clock accuracy: 0x00
@ MGMT Event: Device Connected (0x000b) plen 40
        LE Address: DB:7E:2E:1D:85:E8 (Static)
        Flags: 0x00000000
        Data length: 27
        Name (complete): Designer Mouse
        Appearance: Mouse (0x03c2)
        Flags: 0x05
          LE Limited Discoverable Mode
          BR/EDR Not Supported
        16-bit Service UUIDs (complete): 1 entry
          Human Interface Device (0x1812)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 36 ++++++++++++++++++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  2 ++
 2 files changed, 38 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0ec51eb14810..73e48be5bbb3 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1538,6 +1538,27 @@ struct hci_cp_le_set_ext_scan_enable {
 	__le16  period;
 } __packed;
 
+#define HCI_OP_LE_EXT_CREATE_CONN    0x2043
+struct hci_cp_le_ext_create_conn {
+	__u8      filter_policy;
+	__u8      own_addr_type;
+	__u8      peer_addr_type;
+	bdaddr_t  peer_addr;
+	__u8      phys;
+	__u8      data[0];
+} __packed;
+
+struct hci_cp_le_ext_conn_param {
+	__le16 scan_interval;
+	__le16 scan_window;
+	__le16 conn_interval_min;
+	__le16 conn_interval_max;
+	__le16 conn_latency;
+	__le16 supervision_timeout;
+	__le16 min_ce_len;
+	__le16 max_ce_len;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
@@ -2015,6 +2036,21 @@ struct hci_ev_le_ext_adv_report {
 	__u8	 data[0];
 } __packed;
 
+#define HCI_EV_LE_ENHANCED_CONN_COMPLETE    0x0a
+struct hci_ev_le_enh_conn_complete {
+	__u8      status;
+	__le16    handle;
+	__u8      role;
+	__u8      bdaddr_type;
+	bdaddr_t  bdaddr;
+	bdaddr_t  local_rpa;
+	bdaddr_t  peer_rpa;
+	__le16    interval;
+	__le16    latency;
+	__le16    supervision_timeout;
+	__u8      clk_accurancy;
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cc0bde74dd45..a74453571264 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1161,6 +1161,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 /* Use ext scanning if set ext scan param and ext scan enable is supported */
 #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
 			   ((dev)->commands[37] & 0x40))
+/* Use ext create connection if command is supported */
+#define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
 
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
-- 
cgit 


From 11a245e2f7bf25fc21f47e4c9c8491841b128890 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 6 Jul 2018 21:01:05 +0200
Subject: net/sched: act_csum: fix NULL dereference when 'goto chain' is used

the control action in the common member of struct tcf_csum must be a valid
value, as it can contain the chain index when 'goto chain' is used. Ensure
that the control action can be read as x->tcfa_action, when x is a pointer
to struct tc_action and x->ops->type is TCA_ACT_CSUM, to prevent the
following command:

  # tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
  > $tcflags dst_mac $h2mac action csum ip or tcp or udp or sctp goto chain 1

from triggering a NULL pointer dereference when a matching packet is
received.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 800000010416b067 P4D 800000010416b067 PUD 1041be067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 0 PID: 3072 Comm: mausezahn Tainted: G            E     4.18.0-rc2.auguri+ #421
 Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.58 02/07/2013
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffffa020dea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffffa020d7ccef00 RCX: 0000000000000054
 RDX: 0000000000000000 RSI: ffffa020ca5ae000 RDI: ffffa020d7ccef00
 RBP: ffffa020dea03e60 R08: 0000000000000000 R09: ffffa020dea03c9c
 R10: ffffa020dea03c78 R11: 0000000000000008 R12: ffffa020d3fe4f00
 R13: ffffa020d3fe4f08 R14: 0000000000000001 R15: ffffa020d53ca300
 FS:  00007f5a46942740(0000) GS:ffffa020dea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 0000000104218002 CR4: 00000000001606f0
 Call Trace:
  <IRQ>
  fl_classify+0x1ad/0x1c0 [cls_flower]
  ? arp_rcv+0x121/0x1b0
  ? __x2apic_send_IPI_dest+0x40/0x40
  ? smp_reschedule_interrupt+0x1c/0xd0
  ? reschedule_interrupt+0xf/0x20
  ? reschedule_interrupt+0xa/0x20
  ? device_is_rmrr_locked+0xe/0x50
  ? iommu_should_identity_map+0x49/0xd0
  ? __intel_map_single+0x30/0x140
  ? e1000e_update_rdt_wa.isra.52+0x22/0xb0 [e1000e]
  ? e1000_alloc_rx_buffers+0x233/0x250 [e1000e]
  ? kmem_cache_alloc+0x38/0x1c0
  tcf_classify+0x89/0x140
  __netif_receive_skb_core+0x5ea/0xb70
  ? enqueue_task_fair+0xb6/0x7d0
  ? process_backlog+0x97/0x150
  process_backlog+0x97/0x150
  net_rx_action+0x14b/0x3e0
  __do_softirq+0xde/0x2b4
  do_softirq_own_stack+0x2a/0x40
  </IRQ>
  do_softirq.part.18+0x49/0x50
  __local_bh_enable_ip+0x49/0x50
  __dev_queue_xmit+0x4ab/0x8a0
  ? wait_woken+0x80/0x80
  ? packet_sendmsg+0x38f/0x810
  ? __dev_queue_xmit+0x8a0/0x8a0
  packet_sendmsg+0x38f/0x810
  sock_sendmsg+0x36/0x40
  __sys_sendto+0x10e/0x140
  ? do_vfs_ioctl+0xa4/0x630
  ? syscall_trace_enter+0x1df/0x2e0
  ? __audit_syscall_exit+0x22a/0x290
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f5a45cbec93
 Code: 48 8b 0d 18 83 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 59 c7 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 2b f7 ff ff 48 89 04 24
 RSP: 002b:00007ffd0ee6d748 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 0000000001161010 RCX: 00007f5a45cbec93
 RDX: 0000000000000062 RSI: 0000000001161322 RDI: 0000000000000003
 RBP: 00007ffd0ee6d780 R08: 00007ffd0ee6d760 R09: 0000000000000014
 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000062
 R13: 0000000001161322 R14: 00007ffd0ee6d760 R15: 0000000000000003
 Modules linked in: act_csum act_gact cls_flower sch_ingress vrf veth act_tunnel_key(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel snd_hda_codec_hdmi snd_hda_codec_realtek kvm snd_hda_codec_generic hp_wmi iTCO_wdt sparse_keymap rfkill mei_wdt iTCO_vendor_support wmi_bmof gpio_ich irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel snd_hda_intel crypto_simd cryptd snd_hda_codec glue_helper snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm pcspkr i2c_i801 snd_timer snd sg lpc_ich soundcore wmi mei_me
  mei ie31200_edac nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom sd_mod ahci libahci crc32c_intel i915 ixgbe serio_raw libata video dca i2c_algo_bit sfc drm_kms_helper syscopyarea mtd sysfillrect mdio sysimgblt fb_sys_fops drm e1000e i2c_core
 CR2: 0000000000000000
 ---[ end trace 3c9e9d1a77df4026 ]---
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffffa020dea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffffa020d7ccef00 RCX: 0000000000000054
 RDX: 0000000000000000 RSI: ffffa020ca5ae000 RDI: ffffa020d7ccef00
 RBP: ffffa020dea03e60 R08: 0000000000000000 R09: ffffa020dea03c9c
 R10: ffffa020dea03c78 R11: 0000000000000008 R12: ffffa020d3fe4f00
 R13: ffffa020d3fe4f08 R14: 0000000000000001 R15: ffffa020d53ca300
 FS:  00007f5a46942740(0000) GS:ffffa020dea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 0000000104218002 CR4: 00000000001606f0
 Kernel panic - not syncing: Fatal exception in interrupt
 Kernel Offset: 0x26400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
 ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fixes: 9c5f69bbd75a ("net/sched: act_csum: don't use spinlock in the fast path")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_csum.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 9470fd7e4350..32d2454c0479 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -7,7 +7,6 @@
 #include <linux/tc_act/tc_csum.h>
 
 struct tcf_csum_params {
-	int action;
 	u32 update_flags;
 	struct rcu_head rcu;
 };
-- 
cgit 


From 38230a3e0e0933bbcf5df6fa469ba0667f667568 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 6 Jul 2018 21:01:06 +0200
Subject: net/sched: act_tunnel_key: fix NULL dereference when 'goto chain' is
 used

the control action in the common member of struct tcf_tunnel_key must be a
valid value, as it can contain the chain index when 'goto chain' is used.
Ensure that the control action can be read as x->tcfa_action, when x is a
pointer to struct tc_action and x->ops->type is TCA_ACT_TUNNEL_KEY, to
prevent the following command:

 # tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
 > $tcflags dst_mac $h2mac action tunnel_key unset goto chain 1

from causing a NULL dereference when a matching packet is received:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 80000001097ac067 P4D 80000001097ac067 PUD 103b0a067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 0 PID: 3491 Comm: mausezahn Tainted: G            E     4.18.0-rc2.auguri+ #421
 Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.58 02/07/2013
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001
 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c
 R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800
 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40
 FS:  00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0
 Call Trace:
  <IRQ>
  fl_classify+0x1ad/0x1c0 [cls_flower]
  ? __update_load_avg_se.isra.47+0x1ca/0x1d0
  ? __update_load_avg_se.isra.47+0x1ca/0x1d0
  ? update_load_avg+0x665/0x690
  ? update_load_avg+0x665/0x690
  ? kmem_cache_alloc+0x38/0x1c0
  tcf_classify+0x89/0x140
  __netif_receive_skb_core+0x5ea/0xb70
  ? enqueue_entity+0xd0/0x270
  ? process_backlog+0x97/0x150
  process_backlog+0x97/0x150
  net_rx_action+0x14b/0x3e0
  __do_softirq+0xde/0x2b4
  do_softirq_own_stack+0x2a/0x40
  </IRQ>
  do_softirq.part.18+0x49/0x50
  __local_bh_enable_ip+0x49/0x50
  __dev_queue_xmit+0x4ab/0x8a0
  ? wait_woken+0x80/0x80
  ? packet_sendmsg+0x38f/0x810
  ? __dev_queue_xmit+0x8a0/0x8a0
  packet_sendmsg+0x38f/0x810
  sock_sendmsg+0x36/0x40
  __sys_sendto+0x10e/0x140
  ? do_vfs_ioctl+0xa4/0x630
  ? syscall_trace_enter+0x1df/0x2e0
  ? __audit_syscall_exit+0x22a/0x290
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7fd67e18dc93
 Code: 48 8b 0d 18 83 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 59 c7 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 2b f7 ff ff 48 89 04 24
 RSP: 002b:00007ffe0189b748 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 00000000020ca010 RCX: 00007fd67e18dc93
 RDX: 0000000000000062 RSI: 00000000020ca322 RDI: 0000000000000003
 RBP: 00007ffe0189b780 R08: 00007ffe0189b760 R09: 0000000000000014
 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000062
 R13: 00000000020ca322 R14: 00007ffe0189b760 R15: 0000000000000003
 Modules linked in: act_tunnel_key act_gact cls_flower sch_ingress vrf veth act_csum(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter intel_rapl snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek coretemp snd_hda_codec_generic kvm_intel kvm irqbypass snd_hda_intel crct10dif_pclmul crc32_pclmul hp_wmi ghash_clmulni_intel pcbc snd_hda_codec aesni_intel sparse_keymap rfkill snd_hda_core snd_hwdep snd_seq crypto_simd iTCO_wdt gpio_ich iTCO_vendor_support wmi_bmof cryptd mei_wdt glue_helper snd_seq_device snd_pcm pcspkr snd_timer snd i2c_i801 lpc_ich sg soundcore wmi mei_me
  mei ie31200_edac nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod sr_mod cdrom i915 video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ahci crc32c_intel libahci serio_raw sfc libata mtd drm ixgbe mdio i2c_core e1000e dca
 CR2: 0000000000000000
 ---[ end trace 1ab8b5b5d4639dfc ]---
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001
 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c
 R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800
 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40
 FS:  00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0
 Kernel panic - not syncing: Fatal exception in interrupt
 Kernel Offset: 0x11400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
 ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_tunnel_key.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index efef0b4b1b2b..46b8c7f1c8d5 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -18,7 +18,6 @@
 struct tcf_tunnel_key_params {
 	struct rcu_head		rcu;
 	int			tcft_action;
-	int			action;
 	struct metadata_dst     *tcft_enc_metadata;
 };
 
-- 
cgit 


From 0ea488ff8d23c93da383fcf424825c298b13b1fb Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 5 Jul 2018 08:50:15 -0700
Subject: bpf: sockmap, convert bpf_compute_data_pointers to bpf_*_sk_skb

In commit

  'bpf: bpf_compute_data uses incorrect cb structure' (8108a7751512)

we added the routine bpf_compute_data_end_sk_skb() to compute the
correct data_end values, but this has since been lost. In kernel
v4.14 this was correct and the above patch was applied in it
entirety. Then when v4.14 was merged into v4.15-rc1 net-next tree
we lost the piece that renamed bpf_compute_data_pointers to the
new function bpf_compute_data_end_sk_skb. This was done here,

e1ea2f9856b7 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")

When it conflicted with the following rename patch,

6aaae2b6c433 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")

Finally, after a refactor I thought even the function
bpf_compute_data_end_sk_skb() was no longer needed and it was
erroneously removed.

However, we never reverted the sk_skb_convert_ctx_access() usage of
tcp_skb_cb which had been committed and survived the merge conflict.
Here we fix this by adding back the helper and *_data_end_sk_skb()
usage. Using the bpf_skc_data_end mapping is not correct because it
expects a qdisc_skb_cb object but at the sock layer this is not the
case. Even though it happens to work here because we don't overwrite
any data in-use at the socket layer and the cb structure is cleared
later this has potential to create some subtle issues. But, even
more concretely the filter.c access check uses tcp_skb_cb.

And by some act of chance though,

struct bpf_skb_data_end {
        struct qdisc_skb_cb        qdisc_cb;             /*     0    28 */

        /* XXX 4 bytes hole, try to pack */

        void *                     data_meta;            /*    32     8 */
        void *                     data_end;             /*    40     8 */

        /* size: 48, cachelines: 1, members: 3 */
        /* sum members: 44, holes: 1, sum holes: 4 */
        /* last cacheline: 48 bytes */
};

and then tcp_skb_cb,

struct tcp_skb_cb {
	[...]
                struct {
                        __u32      flags;                /*    24     4 */
                        struct sock * sk_redir;          /*    32     8 */
                        void *     data_end;             /*    40     8 */
                } bpf;                                   /*          24 */
        };

So when we use offset_of() to track down the byte offset we get 40 in
either case and everything continues to work. Fix this mess and use
correct structures its unclear how long this might actually work for
until someone moves the structs around.

Reported-by: Martin KaFai Lau <kafai@fb.com>
Fixes: e1ea2f9856b7 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
Fixes: 6aaae2b6c433 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 800582b5dd54..af3ec72d5d41 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -828,6 +828,10 @@ struct tcp_skb_cb {
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
+static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
+}
 
 #if IS_ENABLED(CONFIG_IPV6)
 /* This is the variant of inet6_iif() that must be used by TCP,
-- 
cgit 


From 03dc7a35fcc83a199121a5156c4a7a976b836682 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jul 2018 12:19:14 +0200
Subject: ipv6: xfrm: use 64-bit timestamps

get_seconds() is deprecated because it can overflow on 32-bit
architectures.  For the xfrm_state->lastused member, we treat the data
as a 64-bit number already, so we just need to use the right accessor
that works on both 32-bit and 64-bit machines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a5378613a49c..1350e2cf0749 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -227,7 +227,7 @@ struct xfrm_state {
 	long		saved_tmo;
 
 	/* Last used time */
-	unsigned long		lastused;
+	time64_t		lastused;
 
 	struct page_frag xfrag;
 
-- 
cgit 


From 05296620f6d14dce0030b87e1e57891a770fb65c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:40 -0700
Subject: xdp: factor out common program/flags handling from drivers

Basic operations drivers perform during xdp setup and query can
be moved to helpers in the core.  Encapsulate program and flags
into a structure and add helpers.  Note that the structure is
intended as the "main" program information source in the driver.
Most drivers will additionally place the program pointer in their
fast path or ring structures.

The helpers don't have a huge impact now, but they will
decrease the code duplication when programs can be installed
in HW and driver at the same time.  Encapsulating the basic
operations in helpers will hopefully also reduce the number
of changes to drivers which adopt them.

Helpers could really be static inline, but they depend on
definition of struct netdev_bpf which means they'd have
to be placed in netdevice.h, an already 4500 line header.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 2deea7166a34..fcb033f51d8c 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -144,4 +144,17 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 	return unlikely(xdp->data_meta > xdp->data);
 }
 
+struct xdp_attachment_info {
+	struct bpf_prog *prog;
+	u32 flags;
+};
+
+struct netdev_bpf;
+int xdp_attachment_query(struct xdp_attachment_info *info,
+			 struct netdev_bpf *bpf);
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+			     struct netdev_bpf *bpf);
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+			  struct netdev_bpf *bpf);
+
 #endif /* __LINUX_NET_XDP_H__ */
-- 
cgit 


From 01683a1469995cc7aaf833d6f8b3f1c1d2fc3b92 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 9 Jul 2018 13:29:11 +0300
Subject: net: sched: refactor flower walk to iterate over idr

Extend struct tcf_walker with additional 'cookie' field. It is intended to
be used by classifier walk implementations to continue iteration directly
from particular filter, instead of iterating 'skip' number of times.

Change flower walk implementation to save filter handle in 'cookie'. Each
time flower walk is called, it looks up filter with saved handle directly
with idr, instead of iterating over filter linked list 'skip' number of
times. This change improves complexity of dumping flower classifier from
quadratic to linearithmic. (assuming idr lookup has logarithmic complexity)

Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reported-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 2081e4219f81..e4252a176eec 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -13,6 +13,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
+	unsigned long cookie;
 	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
-- 
cgit 


From a69258f7aa2623e0930212f09c586fd06674ad79 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jul 2018 06:04:53 -0700
Subject: tcp: remove DELAYED ACK events in DCTCP

After fixing the way DCTCP tracking delayed ACKs, the delayed-ACK
related callbacks are no longer needed

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index af3ec72d5d41..3482d13d655b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -912,8 +912,6 @@ enum tcp_ca_event {
 	CA_EVENT_LOSS,		/* loss timeout */
 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
-	CA_EVENT_DELAYED_ACK,	/* Delayed ack is sent */
-	CA_EVENT_NON_DELAYED_ACK,
 };
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
-- 
cgit 


From d80a1b9d186057ddb0d384ba601cf2b7d214539c Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:39 +0300
Subject: tls: Refactor tls_offload variable names

For symmetry, we rename tls_offload_context to
tls_offload_context_tx before we add tls_offload_context_rx.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 70c273777fe9..5dcd808236a7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -128,7 +128,7 @@ struct tls_record_info {
 	skb_frag_t frags[MAX_SKB_FRAGS];
 };
 
-struct tls_offload_context {
+struct tls_offload_context_tx {
 	struct crypto_aead *aead_send;
 	spinlock_t lock;	/* protects records list */
 	struct list_head records_list;
@@ -147,8 +147,8 @@ struct tls_offload_context {
 #define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
 };
 
-#define TLS_OFFLOAD_CONTEXT_SIZE                                               \
-	(ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) +           \
+#define TLS_OFFLOAD_CONTEXT_SIZE_TX                                            \
+	(ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
 	 TLS_DRIVER_STATE_SIZE)
 
 enum {
@@ -239,7 +239,7 @@ void tls_device_sk_destruct(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn);
 
 static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
@@ -380,10 +380,10 @@ static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
 	return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
-static inline struct tls_offload_context *tls_offload_ctx(
-		const struct tls_context *tls_ctx)
+static inline struct tls_offload_context_tx *
+tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 {
-	return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
+	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
@@ -396,7 +396,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct sk_buff *skb);
 
 int tls_sw_fallback_init(struct sock *sk,
-			 struct tls_offload_context *offload_ctx,
+			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
 #endif /* _TLS_OFFLOAD_H */
-- 
cgit 


From dafb67f3bb4a58a45fe92c1e362ea6429831688a Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:40 +0300
Subject: tls: Split decrypt_skb to two functions

Previously, decrypt_skb also updated the TLS context.
Now, decrypt_skb only decrypts the payload using the current context,
while decrypt_skb_update also updates the state.

Later, in the tls_device Rx flow, we will use decrypt_skb directly.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 5dcd808236a7..49b89221db43 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -390,6 +390,8 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		struct scatterlist *sgout);
 
 struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct net_device *dev,
-- 
cgit 


From 39f56e1a78d647316db330c3b6f4c5637a895e3b Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:41 +0300
Subject: tls: Split tls_sw_release_resources_rx

This patch splits tls_sw_release_resources_rx into two functions one
which releases all inner software tls structures and another that also
frees the containing structure.

In TLS_DEVICE we will need to release the software structures without
freeeing the containing structure, which contains other information.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 49b89221db43..7a485de25646 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -223,6 +223,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
+void tls_sw_release_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
 unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-- 
cgit 


From 4799ac81e52a72a6404827bf2738337bb581a174 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:43 +0300
Subject: tls: Add rx inline crypto offload

This patch completes the generic infrastructure to offload TLS crypto to a
network device. It enables the kernel to skip decryption and
authentication of some skbs marked as decrypted by the NIC. In the fast
path, all packets received are decrypted by the NIC and the performance
is comparable to plain TCP.

This infrastructure doesn't require a TCP offload engine. Instead, the
NIC only decrypts packets that contain the expected TCP sequence number.
Out-Of-Order TCP packets are provided unmodified. As a result, at the
worst case a received TLS record consists of both plaintext and ciphertext
packets. These partially decrypted records must be reencrypted,
only to be decrypted.

The notable differences between SW KTLS Rx and this offload are as
follows:
1. Partial decryption - Software must handle the case of a TLS record
that was only partially decrypted by HW. This can happen due to packet
reordering.
2. Resynchronization - tls_read_size calls the device driver to
resynchronize HW after HW lost track of TLS record framing in
the TCP stream.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 7a485de25646..d8b3b6578c01 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -83,6 +83,16 @@ struct tls_device {
 	void (*unhash)(struct tls_device *device, struct sock *sk);
 };
 
+enum {
+	TLS_BASE,
+	TLS_SW,
+#ifdef CONFIG_TLS_DEVICE
+	TLS_HW,
+#endif
+	TLS_HW_RECORD,
+	TLS_NUM_CONFIG,
+};
+
 struct tls_sw_context_tx {
 	struct crypto_aead *aead_send;
 	struct crypto_wait async_wait;
@@ -197,6 +207,7 @@ struct tls_context {
 	int (*push_pending_record)(struct sock *sk, int flags);
 
 	void (*sk_write_space)(struct sock *sk);
+	void (*sk_destruct)(struct sock *sk);
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
 	int  (*setsockopt)(struct sock *sk, int level,
@@ -209,13 +220,27 @@ struct tls_context {
 	void (*unhash)(struct sock *sk);
 };
 
+struct tls_offload_context_rx {
+	/* sw must be the first member of tls_offload_context_rx */
+	struct tls_sw_context_rx sw;
+	atomic64_t resync_req;
+	u8 driver_state[];
+	/* The TLS layer reserves room for driver specific state
+	 * Currently the belief is that there is not enough
+	 * driver specific state to justify another layer of indirection
+	 */
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE_RX					\
+	(ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \
+	 TLS_DRIVER_STATE_SIZE)
+
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
 		int __user *optlen);
 int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
-
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
@@ -290,11 +315,19 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
+struct sk_buff *
+tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
+		      struct sk_buff *skb);
+
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
-	return sk_fullsock(sk) &&
-	       /* matches smp_store_release in tls_set_device_offload */
-	       smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+	return sk_fullsock(sk) &
+	       (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
+	       &tls_validate_xmit_skb);
+#else
+	return false;
+#endif
 }
 
 static inline void tls_err_abort(struct sock *sk, int err)
@@ -387,10 +420,27 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
+static inline struct tls_offload_context_rx *
+tls_offload_ctx_rx(const struct tls_context *tls_ctx)
+{
+	return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+/* The TLS context is valid until sk_destruct is called */
+static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+	atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1));
+}
+
+
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout);
 
@@ -402,4 +452,9 @@ int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
+
+void tls_device_offload_cleanup_rx(struct sock *sk);
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn);
+
 #endif /* _TLS_OFFLOAD_H */
-- 
cgit 


From f286586df68e7733a8e651098401f139dc2e17f4 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Mon, 18 Jun 2018 15:12:52 +0200
Subject: netfilter: nft_tproxy: Move nf_tproxy_assign_sock() to nf_tproxy.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function is also necessary to implement nft tproxy support

Fixes: 45ca4e0cf273 ("netfilter: Libify xt_TPROXY")
Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 9754a50ecde9..d5a80888cbe4 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -17,6 +17,14 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
 	return false;
 }
 
+/* assign a socket to the skb -- consumes sk */
+static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_edemux;
+}
+
 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
 
 /**
-- 
cgit 


From 60e3be94e6a1c5162a0763c9aafb5190b2b1fdce Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:55:32 +0200
Subject: openvswitch: use nf_ct_get_tuplepr, invert_tuplepr

These versions deal with the l3proto/l4proto details internally.
It removes only caller of nf_ct_get_tuple, so make it static.

After this, l3proto->get_l4proto() can be removed in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 9b5e7634713e..90df45022c51 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -40,13 +40,6 @@ void nf_conntrack_cleanup_start(void);
 void nf_conntrack_init_end(void);
 void nf_conntrack_cleanup_end(void);
 
-bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
-		     unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
-		     struct net *net,
-		     struct nf_conntrack_tuple *tuple,
-		     const struct nf_conntrack_l3proto *l3proto,
-		     const struct nf_conntrack_l4proto *l4proto);
-
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
 			const struct nf_conntrack_l3proto *l3proto,
-- 
cgit 


From f957be9d349a3800940f823b16e12b0405cc305b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:44 +0200
Subject: netfilter: conntrack: remove ctnetlink callbacks from l3 protocol
 trackers

handle everything from ctnetlink directly.

After all these years we still only support ipv4 and ipv6, so it
seems reasonable to remove l3 protocol tracker support and instead
handle ipv4/ipv6 from a common, always builtin inet tracker.

Step 1: Get rid of all the l3proto->func() calls.

Start with ctnetlink, then move on to packet-path ones.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h    | 6 ++----
 include/net/netfilter/nf_conntrack_l3proto.h | 8 --------
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 90df45022c51..d454a53ba646 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -68,10 +68,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	return ret;
 }
 
-void
-print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
-            const struct nf_conntrack_l4proto *proto);
+void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_l4proto *proto);
 
 #define CONNTRACK_LOCKS 1024
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index d5808f3e2715..d07b5216a925 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -46,14 +46,6 @@ struct nf_conntrack_l3proto {
 	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
 			   unsigned int *dataoff, u_int8_t *protonum);
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	int (*tuple_to_nlattr)(struct sk_buff *skb,
-			       const struct nf_conntrack_tuple *t);
-	int (*nlattr_to_tuple)(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
-	const struct nla_policy *nla_policy;
-#endif
-
 	/* Called when netns wants to use connection tracking */
 	int (*net_ns_get)(struct net *);
 	void (*net_ns_put)(struct net *);
-- 
cgit 


From 47a91b14de62e35d1466820cbb4c024b6c02dff1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:45 +0200
Subject: netfilter: conntrack: remove pkt_to_tuple indirection from l3
 protocol trackers

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index d07b5216a925..ece231450f30 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,13 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Try to fill in the third arg: nhoff is offset of l3 proto
-         * hdr.  Return true if possible.
-	 */
-	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
-			     struct nf_conntrack_tuple *tuple);
-
 	/*
 	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
 	 * Some packets can't be inverted: return 0 in that case.
-- 
cgit 


From d1b6fe94941f43e4743d5fea953d16b0a001c2c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:46 +0200
Subject: netfilter: conntrack: remove invert_tuple indirection from l3
 protocol trackers

Its simpler to just handle it directly in nf_ct_invert_tuple().
Also gets rid of need to pass l3proto pointer to resolve_conntrack().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h    | 1 -
 include/net/netfilter/nf_conntrack_l3proto.h | 7 -------
 2 files changed, 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index d454a53ba646..35461b2d3462 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -42,7 +42,6 @@ void nf_conntrack_cleanup_end(void);
 
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
-			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
 /* Find a connection corresponding to a tuple. */
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index ece231450f30..164641c743a5 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,13 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
-	 */
-	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
-			     const struct nf_conntrack_tuple *orig);
-
 	/*
 	 * Called before tracking. 
 	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
-- 
cgit 


From 6816d931cab009024b68c11c4cf752f8bf9a1e32 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:47 +0200
Subject: netfilter: conntrack: remove get_l4proto indirection from l3 protocol
 trackers

Handle it in the core instead.

ipv6_skip_exthdr() is built-in even if ipv6 is a module, i.e. this
doesn't create an ipv6 dependency.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 164641c743a5..5f160375c93a 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,14 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Called before tracking. 
-	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
-	 *	*protonum: protocol number
-	 */
-	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
-			   unsigned int *dataoff, u_int8_t *protonum);
-
 	/* Called when netns wants to use connection tracking */
 	int (*net_ns_get)(struct net *);
 	void (*net_ns_put)(struct net *);
-- 
cgit 


From 8b3892ea8718920d29432328fe9544d89a429614 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:48 +0200
Subject: netfilter: conntrack: avoid calls to l4proto invert_tuple

Handle the common cases (tcp, udp, etc). in the core and only
do the indirect call for the protocols that need it (GRE for instance).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index a7220eef9aee..6a55e337a161 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -36,7 +36,7 @@ struct nf_conntrack_l4proto {
 			     struct net *net, struct nf_conntrack_tuple *tuple);
 
 	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
+	 * Only used by icmp, most protocols use a generic version.
 	 */
 	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
 			     const struct nf_conntrack_tuple *orig);
-- 
cgit 


From c779e849608a875448f6ffc2a5c2a15523bdcd00 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:50 +0200
Subject: netfilter: conntrack: remove get_timeout() indirection

Not needed, we can have the l4trackers fetch it themselvs.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  8 ++------
 include/net/netfilter/nf_conntrack_timeout.h | 18 ++++--------------
 2 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6a55e337a161..c7a0075d96df 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -45,13 +45,12 @@ struct nf_conntrack_l4proto {
 	int (*packet)(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts);
+		      enum ip_conntrack_info ctinfo);
 
 	/* Called when a new connection for this protocol found;
 	 * returns TRUE if it's OK.  If so, packet() called next. */
 	bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts);
+		    unsigned int dataoff);
 
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
@@ -63,9 +62,6 @@ struct nf_conntrack_l4proto {
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
-	/* Return the array of timeouts for this protocol. */
-	unsigned int *(*get_timeouts)(struct net *net);
-
 	/* convert protoinfo to nfnetink attributes */
 	int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
 			 struct nf_conn *ct);
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 9468ab4ad12d..80ceb3d0291d 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -67,27 +67,17 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 #endif
 };
 
-static inline unsigned int *
-nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-		     const struct nf_conntrack_l4proto *l4proto)
+static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 {
+	unsigned int *timeouts = NULL;
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	struct nf_conn_timeout *timeout_ext;
-	unsigned int *timeouts;
 
 	timeout_ext = nf_ct_timeout_find(ct);
-	if (timeout_ext) {
+	if (timeout_ext)
 		timeouts = nf_ct_timeout_data(timeout_ext);
-		if (unlikely(!timeouts))
-			timeouts = l4proto->get_timeouts(net);
-	} else {
-		timeouts = l4proto->get_timeouts(net);
-	}
-
-	return timeouts;
-#else
-	return l4proto->get_timeouts(net);
 #endif
+	return timeouts;
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-- 
cgit 


From c7ea20c9da5b94e400c8dcc0adb99411f2e430a6 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 10 Jul 2018 22:41:27 +0800
Subject: ipv6/mcast: init as INCLUDE when join SSM INCLUDE group

This an IPv6 version patch of "ipv4/igmp: init group mode as INCLUDE when
join source group". From RFC3810, part 6.1:

   If no per-interface state existed for that
   multicast address before the change (i.e., the change consisted of
   creating a new per-interface record), or if no state exists after the
   change (i.e., the change consisted of deleting a per-interface
   record), then the "non-existent" state is considered to have an
   INCLUDE filter mode and an empty source list.

Which means a new multicast group should start with state IN(). Currently,
for MLDv2 SSM JOIN_SOURCE_GROUP mode, we first call ipv6_sock_mc_join(),
then ip6_mc_source(), which will trigger a TO_IN() message instead of
ALLOW().

The issue was exposed by commit a052517a8ff65 ("net/multicast: should not
send source list records when have filter mode change"). Before this change,
we sent both ALLOW(A) and TO_IN(A). Now, we only send TO_IN(A).

Fix it by adding a new parameter to init group mode. Also add some wrapper
functions to avoid changing too much code.

v1 -> v2:
In the first version I only cleared the group change record. But this is not
enough. Because when a new group join, it will init as EXCLUDE and trigger
a filter mode change in ip/ip6_mc_add_src(), which will clear all source
addresses sf_crcount. This will prevent early joined address sending state
change records if multi source addressed joined at the same time.

In v2 patch, I fixed it by directly initializing the mode to INCLUDE for SSM
JOIN_SOURCE_GROUP. I also split the original patch into two separated patches
for IPv4 and IPv6.

There is also a difference between v4 and v6 version. For IPv6, when the
interface goes down and up, we will send correct state change record with
unspecified IPv6 address (::) with function ipv6_mc_up(). But after DAD is
completed, we resend the change record TO_IN() in mld_send_initial_cr().
Fix it by sending ALLOW() for INCLUDE mode in mld_send_initial_cr().

Fixes: a052517a8ff65 ("net/multicast: should not send source list records when have filter mode change")
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d02881e4ad1f..7528632bcf2a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1100,6 +1100,8 @@ void ipv6_sysctl_unregister(void);
 
 int ipv6_sock_mc_join(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
+int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
+			  const struct in6_addr *addr, unsigned int mode);
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
 #endif /* _NET_IPV6_H */
-- 
cgit 


From a0ae2562c6c4b2721d9fddba63b7286c13517d9f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:51 +0200
Subject: netfilter: conntrack: remove l3proto abstraction

This unifies ipv4 and ipv6 protocol trackers and removes the l3proto
abstraction.

This gets rid of all l3proto indirect calls and the need to do
a lookup on the function to call for l3 demux.

It increases module size by only a small amount (12kbyte), so this reduces
size because nf_conntrack.ko is useless without either nf_conntrack_ipv4
or nf_conntrack_ipv6 module.

before:
   text    data     bss     dec     hex filename
   7357    1088       0    8445    20fd nf_conntrack_ipv4.ko
   7405    1084       4    8493    212d nf_conntrack_ipv6.ko
  72614   13689     236   86539   1520b nf_conntrack.ko
 19K nf_conntrack_ipv4.ko
 19K nf_conntrack_ipv6.ko
179K nf_conntrack.ko

after:
   text    data     bss     dec     hex filename
  79277   13937     236   93450   16d0a nf_conntrack.ko
  191K nf_conntrack.ko

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 --
 include/net/netfilter/nf_conntrack.h           |  5 +++
 include/net/netfilter/nf_conntrack_core.h      |  1 -
 include/net/netfilter/nf_conntrack_l3proto.h   | 54 --------------------------
 include/net/netfilter/nf_conntrack_l4proto.h   |  4 --
 5 files changed, 5 insertions(+), 62 deletions(-)
 delete mode 100644 include/net/netfilter/nf_conntrack_l3proto.h

(limited to 'include/net')

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 73f825732326..c84b51682f08 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -10,9 +10,6 @@
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-
-const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
-
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 062dc19b5840..a2b0ed025908 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -41,6 +41,11 @@ union nf_conntrack_expect_proto {
 	/* insert expect proto private data here */
 };
 
+struct nf_conntrack_net {
+	unsigned int users4;
+	unsigned int users6;
+};
+
 #include <linux/types.h>
 #include <linux/skbuff.h>
 
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 35461b2d3462..2a3e0974a6af 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -14,7 +14,6 @@
 #define _NF_CONNTRACK_CORE_H
 
 #include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
deleted file mode 100644
index 5f160375c93a..000000000000
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C)2003,2004 USAGI/WIDE Project
- *
- * Header for use in defining a given L3 protocol for connection tracking.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI	<yasuyuki.kozakai@toshiba.co.jp>
- *
- * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
- */
-
-#ifndef _NF_CONNTRACK_L3PROTO_H
-#define _NF_CONNTRACK_L3PROTO_H
-#include <linux/netlink.h>
-#include <net/netlink.h>
-#include <linux/seq_file.h>
-#include <net/netfilter/nf_conntrack.h>
-
-struct nf_conntrack_l3proto {
-	/* L3 Protocol Family number. ex) PF_INET */
-	u_int16_t l3proto;
-
-	/* size of tuple nlattr, fills a hole */
-	u16 nla_size;
-
-	/* Called when netns wants to use connection tracking */
-	int (*net_ns_get)(struct net *);
-	void (*net_ns_put)(struct net *);
-
-	/* Module (if any) which this is connected to. */
-	struct module *me;
-};
-
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
-
-/* Protocol global registration. */
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
-
-const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
-
-/* Existing built-in protocols */
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
-
-static inline struct nf_conntrack_l3proto *
-__nf_ct_l3proto_find(u_int16_t l3proto)
-{
-	if (unlikely(l3proto >= NFPROTO_NUMPROTO))
-		return &nf_conntrack_l3proto_generic;
-	return rcu_dereference(nf_ct_l3protos[l3proto]);
-}
-
-#endif /*_NF_CONNTRACK_L3PROTO_H*/
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index c7a0075d96df..6068c6da3eac 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -130,10 +130,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
-			   unsigned int num_proto);
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
-			      unsigned int num_proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
-- 
cgit 


From 26b2f552525cf98fad08515bd6faa427f2f22038 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 13 Jul 2018 01:38:08 +0900
Subject: netfilter: nf_tables: fix jumpstack depth validation

The level of struct nft_ctx is updated by nf_tables_check_loops().  That
is used to validate jumpstack depth. But jumpstack validation routine
doesn't update and validate recursively.  So, in some cases, chain depth
can be bigger than the NFT_JUMP_STACK_SIZE.

After this patch, The jumpstack validation routine is located in the
nft_chain_validate(). When new rules or new set elements are added, the
nft_table_validate() is called by the nf_tables_newrule and the
nf_tables_newsetelem. The nft_table_validate() calls the
nft_chain_validate() that visit all their children chains recursively.
So it can update depth of chain certainly.

Reproducer:
   %cat ./test.sh
   #!/bin/bash
   nft add table ip filter
   nft add chain ip filter input { type filter hook input priority 0\; }
   for ((i=0;i<20;i++)); do
	nft add chain ip filter a$i
   done

   nft add rule ip filter input jump a1

   for ((i=0;i<10;i++)); do
	nft add rule ip filter a$i jump a$((i+1))
   done

   for ((i=11;i<19;i++)); do
	nft add rule ip filter a$i jump a$((i+1))
   done

   nft add rule ip filter a10 jump a11

Result:
[  253.931782] WARNING: CPU: 1 PID: 0 at net/netfilter/nf_tables_core.c:186 nft_do_chain+0xacc/0xdf0 [nf_tables]
[  253.931915] Modules linked in: nf_tables nfnetlink ip_tables x_tables
[  253.932153] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.18.0-rc3+ #48
[  253.932153] RIP: 0010:nft_do_chain+0xacc/0xdf0 [nf_tables]
[  253.932153] Code: 83 f8 fb 0f 84 c7 00 00 00 e9 d0 00 00 00 83 f8 fd 74 0e 83 f8 ff 0f 84 b4 00 00 00 e9 bd 00 00 00 83 bd 64 fd ff ff 0f 76 09 <0f> 0b 31 c0 e9 bc 02 00 00 44 8b ad 64 fd
[  253.933807] RSP: 0018:ffff88011b807570 EFLAGS: 00010212
[  253.933807] RAX: 00000000fffffffd RBX: ffff88011b807660 RCX: 0000000000000000
[  253.933807] RDX: 0000000000000010 RSI: ffff880112b39d78 RDI: ffff88011b807670
[  253.933807] RBP: ffff88011b807850 R08: ffffed0023700ece R09: ffffed0023700ecd
[  253.933807] R10: ffff88011b80766f R11: ffffed0023700ece R12: ffff88011b807898
[  253.933807] R13: ffff880112b39d80 R14: ffff880112b39d60 R15: dffffc0000000000
[  253.933807] FS:  0000000000000000(0000) GS:ffff88011b800000(0000) knlGS:0000000000000000
[  253.933807] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  253.933807] CR2: 00000000014f1008 CR3: 000000006b216000 CR4: 00000000001006e0
[  253.933807] Call Trace:
[  253.933807]  <IRQ>
[  253.933807]  ? sched_clock_cpu+0x132/0x170
[  253.933807]  ? __nft_trace_packet+0x180/0x180 [nf_tables]
[  253.933807]  ? sched_clock_cpu+0x132/0x170
[  253.933807]  ? debug_show_all_locks+0x290/0x290
[  253.933807]  ? __lock_acquire+0x4835/0x4af0
[  253.933807]  ? inet_ehash_locks_alloc+0x1a0/0x1a0
[  253.933807]  ? unwind_next_frame+0x159e/0x1840
[  253.933807]  ? __read_once_size_nocheck.constprop.4+0x5/0x10
[  253.933807]  ? nft_do_chain_ipv4+0x197/0x1e0 [nf_tables]
[  253.933807]  ? nft_do_chain+0x5/0xdf0 [nf_tables]
[  253.933807]  nft_do_chain_ipv4+0x197/0x1e0 [nf_tables]
[  253.933807]  ? nft_do_chain_arp+0xb0/0xb0 [nf_tables]
[  253.933807]  ? __lock_is_held+0x9d/0x130
[  253.933807]  nf_hook_slow+0xc4/0x150
[  253.933807]  ip_local_deliver+0x28b/0x380
[  253.933807]  ? ip_call_ra_chain+0x3e0/0x3e0
[  253.933807]  ? ip_rcv_finish+0x1610/0x1610
[  253.933807]  ip_rcv+0xbcc/0xcc0
[  253.933807]  ? debug_show_all_locks+0x290/0x290
[  253.933807]  ? ip_local_deliver+0x380/0x380
[  253.933807]  ? __lock_is_held+0x9d/0x130
[  253.933807]  ? ip_local_deliver+0x380/0x380
[  253.933807]  __netif_receive_skb_core+0x1c9c/0x2240

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 08c005ce56e9..4e82a4c49912 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -150,6 +150,7 @@ static inline void nft_data_debug(const struct nft_data *data)
  *	@portid: netlink portID of the original message
  *	@seq: netlink sequence number
  *	@family: protocol family
+ *	@level: depth of the chains
  *	@report: notify via unicast netlink message
  */
 struct nft_ctx {
@@ -160,6 +161,7 @@ struct nft_ctx {
 	u32				portid;
 	u32				seq;
 	u8				family;
+	u8				level;
 	bool				report;
 };
 
@@ -865,7 +867,6 @@ enum nft_chain_flags {
  *	@table: table that this chain belongs to
  *	@handle: chain handle
  *	@use: number of jump references to this chain
- *	@level: length of longest path to this chain
  *	@flags: bitmask of enum nft_chain_flags
  *	@name: name of the chain
  */
@@ -878,7 +879,6 @@ struct nft_chain {
 	struct nft_table		*table;
 	u64				handle;
 	u32				use;
-	u16				level;
 	u8				flags:6,
 					genmask:2;
 	char				*name;
-- 
cgit 


From cb2b36f5a97df76f547fcc4ab444a02522fb6c96 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:40 -0700
Subject: netfilter: nf_conncount: Switch to plain list

Original patch is from Florian Westphal.

This patch switches from hlist to plain list to store the list of
connections with the same filtering key in nf_conncount. With the
plain list, we can insert new connections at the tail, so over time
the beginning of list holds long-running connections and those are
expired, while the newly creates ones are at the end.

Later on, we could probably move checked ones to the end of the list,
so the next run has higher chance to reclaim stale entries in the front.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 3a188a0923a3..e4884e0e4f69 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -1,8 +1,15 @@
 #ifndef _NF_CONNTRACK_COUNT_H
 #define _NF_CONNTRACK_COUNT_H
 
+#include <linux/list.h>
+
 struct nf_conncount_data;
 
+struct nf_conncount_list {
+	struct list_head head;	/* connections with the same filtering key */
+	unsigned int count;	/* length of list */
+};
+
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
 					    unsigned int keylen);
 void nf_conncount_destroy(struct net *net, unsigned int family,
@@ -14,15 +21,17 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
+unsigned int nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
 				 const struct nf_conntrack_tuple *tuple,
 				 const struct nf_conntrack_zone *zone,
 				 bool *addit);
 
-bool nf_conncount_add(struct hlist_head *head,
+void nf_conncount_list_init(struct nf_conncount_list *list);
+
+bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone);
 
-void nf_conncount_cache_free(struct hlist_head *hhead);
+void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
-- 
cgit 


From 976afca1ceba53df6f4a543014e15d1c7a962571 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:41 -0700
Subject: netfilter: nf_conncount: Early exit in nf_conncount_lookup() and
 cleanup

This patch is originally from Florian Westphal.

This patch does the following three tasks.

It applies the same early exit technique for nf_conncount_lookup().

Since now we keep the number of connections in 'struct nf_conncount_list',
we no longer need to return the count in nf_conncount_lookup().

Moreover, we expose the garbage collection function nf_conncount_gc_list()
for nft_connlimit.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index e4884e0e4f69..dbec17f674b7 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -21,10 +21,10 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
-				 const struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_zone *zone,
-				 bool *addit);
+void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
+			 const struct nf_conntrack_tuple *tuple,
+			 const struct nf_conntrack_zone *zone,
+			 bool *addit);
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
@@ -32,6 +32,9 @@ bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone);
 
+void nf_conncount_gc_list(struct net *net,
+			  struct nf_conncount_list *list);
+
 void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
-- 
cgit 


From 5c789e131cbb997a528451564ea4613e812fc718 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:44 -0700
Subject: netfilter: nf_conncount: Add list lock and gc worker, and RCU for
 init tree search

This patch is originally from Florian Westphal.

This patch does the following 3 main tasks.

1) Add list lock to 'struct nf_conncount_list' so that we can
alter the lists containing the individual connections without holding the
main tree lock.  It would be useful when we only need to add/remove to/from
a list without allocate/remove a node in the tree.  With this change, we
update nft_connlimit accordingly since we longer need to maintain
a list lock in nft_connlimit now.

2) Use RCU for the initial tree search to improve tree look up performance.

3) Add a garbage collection worker. This worker is schedule when there
are excessive tree node that needed to be recycled.

Moreover,the rbnode reclaim logic is moved from search tree to insert tree
to avoid race condition.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index dbec17f674b7..4b2b2baf8ab4 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -5,9 +5,17 @@
 
 struct nf_conncount_data;
 
+enum nf_conncount_list_add {
+	NF_CONNCOUNT_ADDED, 	/* list add was ok */
+	NF_CONNCOUNT_ERR,	/* -ENOMEM, must drop skb */
+	NF_CONNCOUNT_SKIP,	/* list is already reclaimed by gc */
+};
+
 struct nf_conncount_list {
+	spinlock_t list_lock;
 	struct list_head head;	/* connections with the same filtering key */
 	unsigned int count;	/* length of list */
+	bool dead;
 };
 
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
@@ -28,11 +36,12 @@ void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
-bool nf_conncount_add(struct nf_conncount_list *list,
-		      const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conntrack_zone *zone);
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+		 const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_zone *zone);
 
-void nf_conncount_gc_list(struct net *net,
+bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list);
 
 void nf_conncount_cache_free(struct nf_conncount_list *list);
-- 
cgit 


From ec1b28ca9674def4a158808a6493bdb87b993d81 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Jul 2018 08:25:52 +0300
Subject: ipvs: provide just conn to ip_vs_state_name

In preparation for followup patches, provide just the cp
ptr to ip_vs_state_name.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a0bec23c6d5e..4d76abcf1c41 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1221,7 +1221,7 @@ struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 				  struct ip_vs_dest *dest, __u32 fwmark);
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
-const char *ip_vs_state_name(__u16 proto, int state);
+const char *ip_vs_state_name(const struct ip_vs_conn *cp);
 
 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
-- 
cgit 


From 275411430f892407b885be1de2548b2e632892c3 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Jul 2018 08:25:53 +0300
Subject: ipvs: add assured state for conn templates

cp->state was not used for templates. Add support for state bits
and for the first "assured" bit which indicates that some
connection controlled by this template was established or assured
by the real server. In a followup patch we will use it to drop
templates under SYN attack.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4d76abcf1c41..a0d2e0bb9a94 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -335,6 +335,11 @@ enum ip_vs_sctp_states {
 	IP_VS_SCTP_S_LAST
 };
 
+/* Connection templates use bits from state */
+#define IP_VS_CTPL_S_NONE		0x0000
+#define IP_VS_CTPL_S_ASSURED		0x0001
+#define IP_VS_CTPL_S_LAST		0x0002
+
 /* Delta sequence info structure
  * Each ip_vs_conn has 2 (output AND input seq. changes).
  * Only used in the VS/NAT.
@@ -1289,6 +1294,17 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }
 
+/* Mark our template as assured */
+static inline void
+ip_vs_control_assure_ct(struct ip_vs_conn *cp)
+{
+	struct ip_vs_conn *ct = cp->control;
+
+	if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
+	    (ct->flags & IP_VS_CONN_F_TEMPLATE))
+		ct->state |= IP_VS_CTPL_S_ASSURED;
+}
+
 /* IPVS netns init & cleanup functions */
 int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
 int ip_vs_control_net_init(struct netns_ipvs *ipvs);
-- 
cgit 


From 440534d3c56be04abfb26850ee882d19d223557a Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Mon, 9 Jul 2018 18:06:33 +0800
Subject: netfilter: Remove useless param helper of nf_ct_helper_ext_add

The param helper of nf_ct_helper_ext_add is useless now, then remove
it now.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 32c2a94a219d..2492120b8097 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -103,9 +103,7 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
 void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
 				     unsigned int);
 
-struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
-					  struct nf_conntrack_helper *helper,
-					  gfp_t gfp);
+struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 			      gfp_t flags);
-- 
cgit 


From f102d66b335a417d4848da9441f585695a838934 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Jul 2018 13:45:14 +0200
Subject: netfilter: nf_tables: use dedicated mutex to guard transactions

Continue to use nftnl subsys mutex to protect (un)registration of hook types,
expressions and so on, but force batch operations to do their own
locking.

This allows distinct net namespaces to perform transactions in parallel.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/nftables.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 94767ea3a490..286fd960896f 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,6 +7,7 @@
 struct netns_nftables {
 	struct list_head	tables;
 	struct list_head	commit_list;
+	struct mutex		commit_mutex;
 	unsigned int		base_seq;
 	u8			gencursor;
 	u8			validate_state;
-- 
cgit 


From 70b095c84326640eeacfd69a411db8fc36e8ab1a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 14 Jul 2018 01:14:01 +0200
Subject: ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module

IPV6=m
DEFRAG_IPV6=m
CONNTRACK=y yields:

net/netfilter/nf_conntrack_proto.o: In function `nf_ct_netns_do_get':
net/netfilter/nf_conntrack_proto.c:802: undefined reference to `nf_defrag_ipv6_enable'
net/netfilter/nf_conntrack_proto.o:(.rodata+0x640): undefined reference to `nf_conntrack_l4proto_icmpv6'

Setting DEFRAG_IPV6=y causes undefined references to ip6_rhash_params
ip6_frag_init and ip6_expire_frag_queue so it would be needed to force
IPV6=y too.

This patch gets rid of the 'followup linker error' by removing
the dependency of ipv6.ko symbols from netfilter ipv6 defrag.

Shared code is placed into a header, then used from both.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ipv6.h      |  28 -------------
 include/net/ipv6_frag.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 28 deletions(-)
 create mode 100644 include/net/ipv6_frag.h

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index aa6fd11a887c..3720958cd4e1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -581,34 +581,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
 }
 #endif
 
-struct inet_frag_queue;
-
-enum ip6_defrag_users {
-	IP6_DEFRAG_LOCAL_DELIVER,
-	IP6_DEFRAG_CONNTRACK_IN,
-	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
-	IP6_DEFRAG_CONNTRACK_OUT,
-	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
-	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
-};
-
-void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-extern const struct rhashtable_params ip6_rhash_params;
-
-/*
- *	Equivalent of ipv4 struct ip
- */
-struct frag_queue {
-	struct inet_frag_queue	q;
-
-	int			iif;
-	__u16			nhoffset;
-	u8			ecn;
-};
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
-
 static inline bool ipv6_addr_any(const struct in6_addr *a)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
new file mode 100644
index 000000000000..6ced1e6899b6
--- /dev/null
+++ b/include/net/ipv6_frag.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _IPV6_FRAG_H
+#define _IPV6_FRAG_H
+#include <linux/kernel.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+enum ip6_defrag_users {
+	IP6_DEFRAG_LOCAL_DELIVER,
+	IP6_DEFRAG_CONNTRACK_IN,
+	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
+	IP6_DEFRAG_CONNTRACK_OUT,
+	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
+	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
+	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+};
+
+/*
+ *	Equivalent of ipv4 struct ip
+ */
+struct frag_queue {
+	struct inet_frag_queue	q;
+
+	int			iif;
+	__u16			nhoffset;
+	u8			ecn;
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
+{
+	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+	const struct frag_v6_compare_key *key = a;
+
+	q->key.v6 = *key;
+	fq->ecn = 0;
+}
+
+static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	return jhash2(data,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct inet_frag_queue *fq = data;
+
+	return jhash2((const u32 *)&fq->key.v6,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline int
+ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct frag_v6_compare_key *key = arg->key;
+	const struct inet_frag_queue *fq = ptr;
+
+	return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static inline void
+ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
+{
+	struct net_device *dev = NULL;
+	struct sk_buff *head;
+
+	rcu_read_lock();
+	spin_lock(&fq->q.lock);
+
+	if (fq->q.flags & INET_FRAG_COMPLETE)
+		goto out;
+
+	inet_frag_kill(&fq->q);
+
+	dev = dev_get_by_index_rcu(net, fq->iif);
+	if (!dev)
+		goto out;
+
+	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+	/* Don't send error if the first segment did not arrive. */
+	head = fq->q.fragments;
+	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+		goto out;
+
+	head->dev = dev;
+	skb_get(head);
+	spin_unlock(&fq->q.lock);
+
+	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+	kfree_skb(head);
+	goto out_rcu_unlock;
+
+out:
+	spin_unlock(&fq->q.lock);
+out_rcu_unlock:
+	rcu_read_unlock();
+	inet_frag_put(&fq->q);
+}
+#endif
+#endif
-- 
cgit 


From 0015b80abccecca82622d9e9d48eb210572a0c3b Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Mon, 16 Jul 2018 21:10:34 -0700
Subject: net: dsa: Remove VLA usage

We avoid 2 VLAs by using a pre-allocated field in dsa_switch. We also
try to avoid dynamic allocation whenever possible (when using fewer than
bits-per-long ports, which is the common case).

Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
[kees: tweak commit subject and message slightly]
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fdbd6082945d..461e8a7661b7 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -259,6 +259,9 @@ struct dsa_switch {
 	/* Number of switch port queues */
 	unsigned int		num_tx_queues;
 
+	unsigned long		*bitmap;
+	unsigned long		_bitmap;
+
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
-- 
cgit 


From 169dc027fb02492ea37a0575db6a658cf922b854 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 17:12:39 +0100
Subject: ipv6: fix useless rol32 call on hash

The rol32 call is currently rotating hash but the rol'd value is
being discarded. I believe the current code is incorrect and hash
should be assigned the rotated value returned from rol32.

Thanks to David Lebrun for spotting this.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7528632bcf2a..8f73be494503 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -823,7 +823,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 	 * to minimize possbility that any useful information to an
 	 * attacker is leaked. Only lower 20 bits are relevant.
 	 */
-	rol32(hash, 16);
+	hash = rol32(hash, 16);
 
 	flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 
-- 
cgit 


From 5544adb9707fda5d54494c37940701894c16b9a0 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Jul 2018 19:27:17 +0300
Subject: flow_dissector: Dissect tos and ttl from the tunnel info

Add dissection of the tos and ttl from the ip tunnel headers
fields in case a match is needed on them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index c64406717eee..2a17f041f7a1 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -207,7 +207,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
-
+	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_MAX,
 };
 
-- 
cgit 


From bc56b33404599edc412b91933d74b36873e8ea25 Mon Sep 17 00:00:00 2001
From: Benedict Wong <benedictwong@google.com>
Date: Thu, 19 Jul 2018 10:50:44 -0700
Subject: xfrm: Remove xfrmi interface ID from flowi

In order to remove performance impact of having the extra u32 in every
single flowi, this change removes the flowi_xfrm struct, prefering to
take the if_id as a method parameter where needed.

In the inbound direction, if_id is only needed during the
__xfrm_check_policy() function, and the if_id can be determined at that
point based on the skb. As such, xfrmi_decode_session() is only called
with the skb in __xfrm_check_policy().

In the outbound direction, the only place where if_id is needed is the
xfrm_lookup() call in xfrmi_xmit2(). With this change, the if_id is
directly passed into the xfrm_lookup_with_ifid() call. All existing
callers can still call xfrm_lookup(), which uses a default if_id of 0.

This change does not change any behavior of XFRMIs except for improving
overall system performance via flowi size reduction.

This change has been tested against the Android Kernel Networking Tests:

https://android.googlesource.com/kernel/tests/+/master/net/test

Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h  | 14 ++++++++++++++
 include/net/flow.h |  9 ---------
 include/net/xfrm.h |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index b3219cd8a5a1..7f735e76ca73 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -475,6 +475,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net,
 	return dst_orig;
 }
 
+static inline struct dst_entry *
+xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
+		      const struct flowi *fl, const struct sock *sk,
+		      int flags, u32 if_id)
+{
+	return dst_orig;
+}
+
 static inline struct dst_entry *xfrm_lookup_route(struct net *net,
 						  struct dst_entry *dst_orig,
 						  const struct flowi *fl,
@@ -494,6 +502,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct flowi *fl, const struct sock *sk,
 			      int flags);
 
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+					struct dst_entry *dst_orig,
+					const struct flowi *fl,
+					const struct sock *sk, int flags,
+					u32 if_id);
+
 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    const struct flowi *fl, const struct sock *sk,
 				    int flags);
diff --git a/include/net/flow.h b/include/net/flow.h
index 187c9bef672f..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,10 +26,6 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
-struct flowi_xfrm {
-	__u32			if_id;
-};
-
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -43,7 +39,6 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
-	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -83,7 +78,6 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
-#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -115,7 +109,6 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
-	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -145,7 +138,6 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
-#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -193,7 +185,6 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
-#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1350e2cf0749..ca820945f30c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1557,7 +1557,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   const struct flowi *fl,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
-				   unsigned short family);
+				   unsigned short family, u32 if_id);
 struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
-- 
cgit 


From b8088dda98b9064a2b3007fe54b03ede70a15602 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 17 Jul 2018 07:17:53 +0200
Subject: netfilter: nf_tables: use dev->name directly

no need to store the name in separate area.

Furthermore, it uses kmalloc but not kfree and most accesses seem to treat
it as char[IFNAMSIZ] not char *.

Remove this and use dev->name instead.

In case event zeroed dev, just omit the name in the dump.

Fixes: d92191aa84e5f1 ("netfilter: nf_tables: cache device name in flowtable object")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4e82a4c49912..dc417ef0a0c5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1124,7 +1124,6 @@ struct nft_flowtable {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
-	char				*dev_name[NFT_FLOWTABLE_DEVICE_MAX];
 	/* runtime data below here */
 	struct nf_hook_ops		*ops ____cacheline_aligned;
 	struct nf_flowtable		data;
-- 
cgit 


From 27cde44a259c380a3c09066fc4b42de7dde9b1ad Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:35 -0700
Subject: tcp: do not cancel delay-AcK on DCTCP special ACK

Currently when a DCTCP receiver delays an ACK and receive a
data packet with a different CE mark from the previous one's, it
sends two immediate ACKs acking previous and latest sequences
respectly (for ECN accounting).

Previously sending the first ACK may mark off the delayed ACK timer
(tcp_event_ack_sent). This may subsequently prevent sending the
second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
The culprit is that tcp_send_ack() assumes it always acknowleges
the latest sequence, which is not true for the first special ACK.

The fix is to not make the assumption in tcp_send_ack and check the
actual ack sequence before cancelling the delayed ACK. Further it's
safer to pass the ack sequence number as a local variable into
tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
future bugs like this.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3482d13d655b..a08de496d1b2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -539,6 +539,7 @@ void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
 void tcp_push_one(struct sock *, unsigned int mss_now);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
-- 
cgit 


From a0496ef2c23b3b180902dd185d0d63ccbc624cf8 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:36 -0700
Subject: tcp: do not delay ACK in DCTCP upon CE status change

Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change
has to be sent immediately so the sender can respond quickly:

""" When receiving packets, the CE codepoint MUST be processed as follows:

   1.  If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to
       true and send an immediate ACK.

   2.  If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE
       to false and send an immediate ACK.
"""

Previously DCTCP implementation may continue to delay the ACK. This
patch fixes that to implement the RFC by forcing an immediate ACK.

Tested with this packetdrill script provided by Larry Brakmo

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4
   +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
+0.005 < [ce] . 2001:3001(1000) ack 2 win 257

+0.000 > [ect01] . 2:2(0) ack 2001
// Previously the ACK below would be delayed by 40ms
+0.000 > [ect01] E. 2:2(0) ack 3001

+0.500 < F. 9501:9501(0) ack 4 win 257

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a08de496d1b2..25116ec02087 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -342,6 +342,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 			struct pipe_inode_info *pipe, size_t len,
 			unsigned int flags);
 
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
-- 
cgit 


From fbdeaed408cf2728c62640c10848ddb1b67e63d3 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 20 Jul 2018 21:56:53 +0000
Subject: net: create reusable function for getting ownership info of sysfs
 inodes

Make net_ns_get_ownership() reusable by networking code outside of core.
This is useful, for example, to allow bridge related sysfs files to be
owned by container root.

Add a function comment since this is a potentially dangerous function to
use given the way that kobject_get_ownership() works by initializing uid
and gid before calling .get_ownership().

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a71264d75d7f..9b5fdc50519a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -10,6 +10,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
+#include <linux/uidgid.h>
 
 #include <net/flow.h>
 #include <net/netns/core.h>
@@ -170,6 +171,8 @@ extern struct net init_net;
 struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 			struct net *old_net);
 
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
 void net_ns_barrier(void);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
@@ -182,6 +185,13 @@ static inline struct net *copy_net_ns(unsigned long flags,
 	return old_net;
 }
 
+static inline void net_ns_get_ownership(const struct net *net,
+					kuid_t *uid, kgid_t *gid)
+{
+	*uid = GLOBAL_ROOT_UID;
+	*gid = GLOBAL_ROOT_GID;
+}
+
 static inline void net_ns_barrier(void) {}
 #endif /* CONFIG_NET_NS */
 
-- 
cgit 


From 24b711edfc34bc45777a3f068812b7d1ed004a5d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 19 Jul 2018 12:41:18 -0700
Subject: net/ipv6: Fix linklocal to global address with VRF

Example setup:
    host: ip -6 addr add dev eth1 2001:db8:104::4
           where eth1 is enslaved to a VRF

    switch: ip -6 ro add 2001:db8:104::4/128 dev br1
            where br1 only has an LLA

           ping6 2001:db8:104::4
           ssh   2001:db8:104::4

(NOTE: UDP works fine if the PKTINFO has the address set to the global
address and ifindex is set to the index of eth1 with a destination an
LLA).

For ICMP, icmp6_iif needs to be updated to check if skb->dev is an
L3 master. If it is then return the ifindex from rt6i_idev similar
to what is done for loopback.

For TCP, restore the original tcp_v6_iif definition which is needed in
most places and add a new tcp_v6_iif_l3_slave that considers the
l3_slave variability. This latter check is only needed for socket
lookups.

Fixes: 9ff74384600a ("net: vrf: Handle ipv6 multicast and link-local addresses")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 25116ec02087..cd3ecda9386a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -840,6 +840,11 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
  * as TCP moves IP6CB into a different location in skb->cb[]
  */
 static inline int tcp_v6_iif(const struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->header.h6.iif;
+}
+
+static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb)
 {
 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 
-- 
cgit 


From 042f8825569d628517784d558aefe23c212f0fb2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 20 Jul 2018 21:14:38 -0700
Subject: nfp: bring back support for offloading shared blocks

Now that we have offload replay infrastructure added by
commit 326367427cc0 ("net: sched: call reoffload op on block callback reg")
and flows are guaranteed to be removed correctly, we can revert
commit 951a8ee6def3 ("nfp: reject binding to shared blocks").

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e4252a176eec..4f405ca8346f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -114,11 +114,6 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 }
 
-static inline bool tcf_block_shared(struct tcf_block *block)
-{
-	return false;
-}
-
 static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 {
 	return NULL;
-- 
cgit 


From e873e4b9cc7e8ce79e5c5627b32b107035bb3f5d Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Sat, 21 Jul 2018 20:56:32 -0700
Subject: ipv6: use fib6_info_hold_safe() when necessary

In the code path where only rcu read lock is held, e.g. in the route
lookup code path, it is not safe to directly call fib6_info_hold()
because the fib6_info may already have been deleted but still exists
in the rcu grace period. Holding reference to it could cause double
free and crash the kernel.

This patch adds a new function fib6_info_hold_safe() and replace
fib6_info_hold() in all necessary places.

Syzbot reported 3 crash traces because of this. One of them is:
8021q: adding VLAN 0 to HW filter on device team0
IPv6: ADDRCONF(NETDEV_CHANGE): team0: link becomes ready
dst_release: dst:(____ptrval____) refcnt:-1
dst_release: dst:(____ptrval____) refcnt:-2
WARNING: CPU: 1 PID: 4845 at include/net/dst.h:239 dst_hold include/net/dst.h:239 [inline]
WARNING: CPU: 1 PID: 4845 at include/net/dst.h:239 ip6_setup_cork+0xd66/0x1830 net/ipv6/ip6_output.c:1204
dst_release: dst:(____ptrval____) refcnt:-1
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 4845 Comm: syz-executor493 Not tainted 4.18.0-rc3+ #10
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 panic+0x238/0x4e7 kernel/panic.c:184
dst_release: dst:(____ptrval____) refcnt:-2
dst_release: dst:(____ptrval____) refcnt:-3
 __warn.cold.8+0x163/0x1ba kernel/panic.c:536
dst_release: dst:(____ptrval____) refcnt:-4
 report_bug+0x252/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:178 [inline]
 do_error_trap+0x1fc/0x4d0 arch/x86/kernel/traps.c:296
dst_release: dst:(____ptrval____) refcnt:-5
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:316
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
RIP: 0010:dst_hold include/net/dst.h:239 [inline]
RIP: 0010:ip6_setup_cork+0xd66/0x1830 net/ipv6/ip6_output.c:1204
Code: c1 ed 03 89 9d 18 ff ff ff 48 b8 00 00 00 00 00 fc ff df 41 c6 44 05 00 f8 e9 2d 01 00 00 4c 8b a5 c8 fe ff ff e8 1a f6 e6 fa <0f> 0b e9 6a fc ff ff e8 0e f6 e6 fa 48 8b 85 d0 fe ff ff 48 8d 78
RSP: 0018:ffff8801a8fcf178 EFLAGS: 00010293
RAX: ffff8801a8eba5c0 RBX: 0000000000000000 RCX: ffffffff869511e6
RDX: 0000000000000000 RSI: ffffffff869515b6 RDI: 0000000000000005
RBP: ffff8801a8fcf2c8 R08: ffff8801a8eba5c0 R09: ffffed0035ac8338
R10: ffffed0035ac8338 R11: ffff8801ad6419c3 R12: ffff8801a8fcf720
R13: ffff8801a8fcf6a0 R14: ffff8801ad6419c0 R15: ffff8801ad641980
 ip6_make_skb+0x2c8/0x600 net/ipv6/ip6_output.c:1768
 udpv6_sendmsg+0x2c90/0x35f0 net/ipv6/udp.c:1376
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:641 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:651
 ___sys_sendmsg+0x51d/0x930 net/socket.c:2125
 __sys_sendmmsg+0x240/0x6f0 net/socket.c:2220
 __do_sys_sendmmsg net/socket.c:2249 [inline]
 __se_sys_sendmmsg net/socket.c:2246 [inline]
 __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2246
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446ba9
Code: e8 cc bb 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fb39a469da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000006dcc54 RCX: 0000000000446ba9
RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003
RBP: 00000000006dcc50 R08: 00007fb39a46a700 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 45c828efc7a64843
R13: e6eeb815b9d8a477 R14: 5068caf6f713c6fc R15: 0000000000000001
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Reported-by: syzbot+902e2a1bcd4f7808cef5@syzkaller.appspotmail.com
Reported-by: syzbot+8ae62d67f647abeeceb9@syzkaller.appspotmail.com
Reported-by: syzbot+3f08feb14086930677d0@syzkaller.appspotmail.com
Signed-off-by: Wei Wang <weiwan@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 71b9043aa0e7..3d4930528db0 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -281,6 +281,11 @@ static inline void fib6_info_hold(struct fib6_info *f6i)
 	atomic_inc(&f6i->fib6_ref);
 }
 
+static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
+{
+	return atomic_inc_not_zero(&f6i->fib6_ref);
+}
+
 static inline void fib6_info_release(struct fib6_info *f6i)
 {
 	if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
-- 
cgit 


From f71e0ca4db187af7c44987e9d21e9042c3046070 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:05 +0200
Subject: net: sched: Avoid implicit chain 0 creation

Currently, chain 0 is implicitly created during block creation. However
that does not align with chain object exposure, creation and destruction
api introduced later on. So make the chain 0 behave the same way as any
other chain and only create it when it is needed. Since chain 0 is
somehow special as the qdiscs need to hold pointer to the first chain
tp, this requires to move the chain head change callback infra to the
block structure.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7432100027b7..86f4651784e8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -300,7 +300,6 @@ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
 
 struct tcf_chain {
 	struct tcf_proto __rcu *filter_chain;
-	struct list_head filter_chain_list;
 	struct list_head list;
 	struct tcf_block *block;
 	u32 index; /* chain index */
@@ -318,6 +317,10 @@ struct tcf_block {
 	bool keep_dst;
 	unsigned int offloadcnt; /* Number of oddloaded filters */
 	unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
+	struct {
+		struct tcf_chain *chain;
+		struct list_head filter_chain_list;
+	} chain0;
 };
 
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
-- 
cgit 


From 32a4f5ecd7381f30ae3bb36dea77a150ba68af2e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:06 +0200
Subject: net: sched: introduce chain object to uapi

Allow user to create, destroy, get and dump chain objects. Do that by
extending rtnl commands by the chain-specific ones. User will now be
able to explicitly create or destroy chains (so far this was done only
automatically according the filter/act needs and refcounting). Also, the
user will receive notification about any chain creation or destuction.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 86f4651784e8..81ec8276db9c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -304,6 +304,7 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	bool explicitly_created;
 };
 
 struct tcf_block {
-- 
cgit 


From 9f407f1768d3e1a5ddd7bd49fa4d1f5a26e10ed2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:07 +0200
Subject: net: sched: introduce chain templates

Allow user to set a template for newly created chains. Template lock
down the chain for particular classifier type/options combinations.
The classifier needs to support templates, otherwise kernel would
reply with error.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 81ec8276db9c..085c509c8674 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -238,6 +238,8 @@ struct tcf_result {
 	};
 };
 
+struct tcf_chain;
+
 struct tcf_proto_ops {
 	struct list_head	head;
 	char			kind[IFNAMSIZ];
@@ -263,10 +265,18 @@ struct tcf_proto_ops {
 					     tc_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
+	void *			(*tmplt_create)(struct net *net,
+						struct tcf_chain *chain,
+						struct nlattr **tca,
+						struct netlink_ext_ack *extack);
+	void			(*tmplt_destroy)(void *tmplt_priv);
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*);
+	int			(*tmplt_dump)(struct sk_buff *skb,
+					      struct net *net,
+					      void *tmplt_priv);
 
 	struct module		*owner;
 };
@@ -305,6 +315,8 @@ struct tcf_chain {
 	u32 index; /* chain index */
 	unsigned int refcnt;
 	bool explicitly_created;
+	const struct tcf_proto_ops *tmplt_ops;
+	void *tmplt_priv;
 };
 
 struct tcf_block {
-- 
cgit 


From 34738452739069947e528123810533f28dd8332b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:11 +0200
Subject: net: sched: cls_flower: propagate chain teplate creation and
 destruction to drivers

Introduce a couple of flower offload commands in order to propagate
template creation/destruction events down to device drivers.
Drivers may use this information to prepare HW in an optimal way
for future filter insertions.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4f405ca8346f..a3101582f642 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -721,6 +721,8 @@ enum tc_fl_command {
 	TC_CLSFLOWER_REPLACE,
 	TC_CLSFLOWER_DESTROY,
 	TC_CLSFLOWER_STATS,
+	TC_CLSFLOWER_TMPLT_CREATE,
+	TC_CLSFLOWER_TMPLT_DESTROY,
 };
 
 struct tc_cls_flower_offload {
-- 
cgit 


From 1f3ed383fb9a073ae2e408cd7a0717b04c7c3a21 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 27 Jul 2018 09:45:05 +0200
Subject: net: sched: don't dump chains only held by actions

In case a chain is empty and not explicitly created by a user,
such chain should not exist. The only exception is if there is
an action "goto chain" pointing to it. In that case, don't show the
chain in the dump. Track the chain references held by actions and
use them to find out if a chain should or should not be shown
in chain dump.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     | 3 +++
 include/net/sch_generic.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3101582f642..6d02f31abba8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -39,7 +39,10 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 #ifdef CONFIG_NET_CLS
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 				bool create);
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
+				       u32 chain_index);
 void tcf_chain_put(struct tcf_chain *chain);
+void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 085c509c8674..c5432362dc26 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -314,6 +314,7 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	unsigned int action_refcnt;
 	bool explicitly_created;
 	const struct tcf_proto_ops *tmplt_ops;
 	void *tmplt_priv;
-- 
cgit 


From b67c540b8a987e365dc548e5b2ddf023946e3d63 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Fri, 27 Jul 2018 15:26:56 +0300
Subject: net: dcb: Add priority-to-DSCP map getters

On ingress, a network device such as a switch assigns to packets
priority based on various criteria. Common options include interpreting
PCP and DSCP fields according to user configuration. When a packet
egresses the switch, a reverse process may rewrite PCP and/or DSCP
values according to packet priority.

The following three functions support a) obtaining a DSCP-to-priority
map or vice versa, and b) finding default-priority entries in APP
database.

The DCB subsystem supports for APP entries a very generous M:N mapping
between priorities and protocol identifiers. Understandably,
several (say) DSCP values can map to the same priority. But this
asymmetry holds the other way around as well--one priority can map to
several DSCP values. For this reason, the following functions operate in
terms of bitmaps, with ones in positions that match some APP entry.

- dcb_ieee_getapp_dscp_prio_mask_map() to compute for a given netdevice
  a map of DSCP-to-priority-mask, which gives for each DSCP value a
  bitmap of priorities related to that DSCP value by APP, along the
  lines of dcb_ieee_getapp_mask().

- dcb_ieee_getapp_prio_dscp_mask_map() similarly to compute for a given
  netdevice a map from priorities to a bitmap of DSCPs.

- dcb_ieee_getapp_default_prio_mask() which finds all default-priority
  rules for a given port in APP database, and returns a mask of
  priorities allowed by these default-priority rules.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 0e5e91be2d30..e22a8a3c089b 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -34,6 +34,19 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
 u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
 
+struct dcb_ieee_app_prio_map {
+	u64 map[IEEE_8021QAZ_MAX_TCS];
+};
+void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_prio_map *p_map);
+
+struct dcb_ieee_app_dscp_map {
+	u8 map[64];
+};
+void dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_dscp_map *p_map);
+u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev);
+
 int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
 		      u32 seq, u32 pid);
 int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
-- 
cgit 


From 222440b4e832059c0ddf18d1e409f0552ab53a7d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 4 Jul 2018 12:48:04 +0200
Subject: netfilter: nf_tables: handle meta/lookup with direct call

Currently nft uses inlined variants for common operations
such as 'ip saddr 1.2.3.4' instead of an indirect call.

Also handle meta get operations and lookups without indirect call,
both are builtin.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a05134507e7b..8da837d2aaf9 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -71,4 +71,11 @@ extern struct nft_set_type nft_set_hash_fast_type;
 extern struct nft_set_type nft_set_rbtree_type;
 extern struct nft_set_type nft_set_bitmap_type;
 
+struct nft_expr;
+struct nft_regs;
+struct nft_pktinfo;
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_lookup_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
-- 
cgit 


From 6decb5b45e70d6ffff6488cc8e8bad6b9ac7f99b Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:32 +0530
Subject: Bluetooth: Define PHY flags in hdev and set 1M as default

1M is mandatory to be supported by LE controllers and the same
would be set in power on. This patch defines hdev flags for
LE PHYs and set 1M to default.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 4 ++++
 include/net/bluetooth/hci_core.h | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 73e48be5bbb3..664fe1ebf2c7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1514,6 +1514,10 @@ struct hci_cp_le_set_default_phy {
 	__u8    rx_phys;
 } __packed;
 
+#define HCI_LE_SET_PHY_1M		0x01
+#define HCI_LE_SET_PHY_2M		0x02
+#define HCI_LE_SET_PHY_CODED		0x04
+
 #define HCI_OP_LE_SET_EXT_SCAN_PARAMS   0x2041
 struct hci_cp_le_set_ext_scan_params {
 	__u8    own_addr_type;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a74453571264..71f79df9ee05 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -315,6 +315,9 @@ struct hci_dev {
 	unsigned long	sco_last_tx;
 	unsigned long	le_last_tx;
 
+	__u8		le_tx_def_phys;
+	__u8		le_rx_def_phys;
+
 	struct workqueue_struct	*workqueue;
 	struct workqueue_struct	*req_workqueue;
 
-- 
cgit 


From 5075b972f20ddad5bb19542ea4f5794d06673375 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:33 +0530
Subject: Bluetooth: Add defines for BREDR pkt_type and LE PHYs

This also add macros for checking LMP support for different
pkt_types

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 14 ++++++++++++++
 include/net/bluetooth/hci_core.h |  4 ++++
 2 files changed, 18 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 664fe1ebf2c7..89bf800f6eb1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -291,6 +291,14 @@ enum {
 #define HCI_DH3		0x0800
 #define HCI_DH5		0x8000
 
+/* HCI packet types inverted masks */
+#define HCI_2DH1	0x0002
+#define HCI_3DH1	0x0004
+#define HCI_2DH3	0x0100
+#define HCI_3DH3	0x0200
+#define HCI_2DH5	0x1000
+#define HCI_3DH5	0x2000
+
 #define HCI_HV1		0x0020
 #define HCI_HV2		0x0040
 #define HCI_HV3		0x0080
@@ -354,6 +362,8 @@ enum {
 #define LMP_PCONTROL	0x04
 #define LMP_TRANSPARENT	0x08
 
+#define LMP_EDR_2M		0x02
+#define LMP_EDR_3M		0x04
 #define LMP_RSSI_INQ	0x40
 #define LMP_ESCO	0x80
 
@@ -361,7 +371,9 @@ enum {
 #define LMP_EV5		0x02
 #define LMP_NO_BREDR	0x20
 #define LMP_LE		0x40
+#define LMP_EDR_3SLOT	0x80
 
+#define LMP_EDR_5SLOT	0x01
 #define LMP_SNIFF_SUBR	0x02
 #define LMP_PAUSE_ENC	0x04
 #define LMP_EDR_ESCO_2M	0x20
@@ -399,6 +411,8 @@ enum {
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
 #define HCI_LE_EXT_SCAN_POLICY		0x80
+#define HCI_LE_PHY_2M			0x01
+#define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_CHAN_SEL_ALG2		0x40
 
 /* Connection modes */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 71f79df9ee05..a64d13f91d09 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1141,6 +1141,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR)
 #define lmp_ext_feat_capable(dev)  ((dev)->features[0][7] & LMP_EXTFEATURES)
 #define lmp_transp_capable(dev)    ((dev)->features[0][2] & LMP_TRANSPARENT)
+#define lmp_edr_2m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_2M)
+#define lmp_edr_3m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_3M)
+#define lmp_edr_3slot_capable(dev) ((dev)->features[0][4] & LMP_EDR_3SLOT)
+#define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT)
 
 /* ----- Extended LMP capabilities ----- */
 #define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER)
-- 
cgit 


From 6244691fec4dd0adebca255e60e0ed7ac8155b2e Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:34 +0530
Subject: Bluetooth: Implement Get PHY Configuration mgmt command

This commands basically retrieve the supported packet types of
BREDR and supported PHYs of the controller.

BR_1M_1SLOT, LE_1M_TX and LE_1M_RX would be supported by default.
Other PHYs are supported based on the local features.

Also this sets PHY_CONFIGURATION bit in supported settings.

@ MGMT Command: Get PHY Configuration (0x0044) plen 0
@ MGMT Event: Command Complete (0x0001) plen 15
      Get PHY Configuration (0x0044) plen 12
        Status: Success (0x00)
        Supported PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
        Configurable PHYs: 0x79fe
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
        Selected PHYs: 0x07ff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index e7303eee65cd..1c93d6e83a6c 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -101,6 +101,7 @@ struct mgmt_rp_read_index_list {
 #define MGMT_SETTING_PRIVACY		0x00002000
 #define MGMT_SETTING_CONFIGURATION	0x00004000
 #define MGMT_SETTING_STATIC_ADDRESS	0x00008000
+#define MGMT_SETTING_PHY_CONFIGURATION  0x00010000
 
 #define MGMT_OP_READ_INFO		0x0004
 #define MGMT_READ_INFO_SIZE		0
@@ -604,6 +605,30 @@ struct mgmt_cp_set_appearance {
 } __packed;
 #define MGMT_SET_APPEARANCE_SIZE	2
 
+#define MGMT_OP_GET_PHY_CONFIGURATION	0x0044
+struct mgmt_rp_get_phy_confguration {
+	__le32	supported_phys;
+	__le32	configurable_phys;
+	__le32	selected_phys;
+} __packed;
+#define MGMT_GET_PHY_CONFIGURATION_SIZE	0
+
+#define MGMT_PHY_BR_1M_1SLOT	0x00000001
+#define MGMT_PHY_BR_1M_3SLOT	0x00000002
+#define MGMT_PHY_BR_1M_5SLOT	0x00000004
+#define MGMT_PHY_EDR_2M_1SLOT	0x00000008
+#define MGMT_PHY_EDR_2M_3SLOT	0x00000010
+#define MGMT_PHY_EDR_2M_5SLOT	0x00000020
+#define MGMT_PHY_EDR_3M_1SLOT	0x00000040
+#define MGMT_PHY_EDR_3M_3SLOT	0x00000080
+#define MGMT_PHY_EDR_3M_5SLOT	0x00000100
+#define MGMT_PHY_LE_1M_TX		0x00000200
+#define MGMT_PHY_LE_1M_RX		0x00000400
+#define MGMT_PHY_LE_2M_TX		0x00000800
+#define MGMT_PHY_LE_2M_RX		0x00001000
+#define MGMT_PHY_LE_CODED_TX	0x00002000
+#define MGMT_PHY_LE_CODED_RX	0x00004000
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
-- 
cgit 


From 0314f2867fa0c46d0fc1c23c80e7fab9435079df Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:35 +0530
Subject: Bluetooth: Implement Set PHY Confguration command

This enables user to set phys which will be used in all subsequent
connections. Also host will use the same in LE scanning as well.

@ MGMT Command: Set PHY Configuration (0x0045) plen 4
        Selected PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
< HCI Command: LE Set Default PHY (0x08|0x0031) plen 3
        All PHYs preference: 0x00
        TX PHYs preference: 0x07
          LE 1M
          LE 2M
          LE Coded
        RX PHYs preference: 0x07
          LE 1M
          LE 2M
          LE Coded
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Default PHY (0x08|0x0031) ncmd 1
        Status: Success (0x00)
@ MGMT Event: Command Complete (0x0001) plen 3
      Set PHY Configuration (0x0045) plen 0
        Status: Success (0x00)
@ MGMT Event: PHY Configuration Changed (0x0026) plen 4
        Selected PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 1c93d6e83a6c..0916e203e5d9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -629,6 +629,25 @@ struct mgmt_rp_get_phy_confguration {
 #define MGMT_PHY_LE_CODED_TX	0x00002000
 #define MGMT_PHY_LE_CODED_RX	0x00004000
 
+#define MGMT_PHY_BREDR_MASK (MGMT_PHY_BR_1M_1SLOT | MGMT_PHY_BR_1M_3SLOT | \
+			     MGMT_PHY_BR_1M_5SLOT | MGMT_PHY_EDR_2M_1SLOT | \
+			     MGMT_PHY_EDR_2M_3SLOT | MGMT_PHY_EDR_2M_5SLOT | \
+			     MGMT_PHY_EDR_3M_1SLOT | MGMT_PHY_EDR_3M_3SLOT | \
+			     MGMT_PHY_EDR_3M_5SLOT)
+#define MGMT_PHY_LE_MASK (MGMT_PHY_LE_1M_TX | MGMT_PHY_LE_1M_RX | \
+			  MGMT_PHY_LE_2M_TX | MGMT_PHY_LE_2M_RX | \
+			  MGMT_PHY_LE_CODED_TX | MGMT_PHY_LE_CODED_RX)
+#define MGMT_PHY_LE_TX_MASK (MGMT_PHY_LE_1M_TX | MGMT_PHY_LE_2M_TX | \
+			     MGMT_PHY_LE_CODED_TX)
+#define MGMT_PHY_LE_RX_MASK (MGMT_PHY_LE_1M_RX | MGMT_PHY_LE_2M_RX | \
+			     MGMT_PHY_LE_CODED_RX)
+
+#define MGMT_OP_SET_PHY_CONFIGURATION	0x0045
+struct mgmt_cp_set_phy_confguration {
+	__le32	selected_phys;
+} __packed;
+#define MGMT_SET_PHY_CONFIGURATION_SIZE	4
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
-- 
cgit 


From b7c23df85b6a1c3bcfb591cfa938d341fc3a556e Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:36 +0530
Subject: Bluetooth: Implement PHY changed event

This defines and implement phy changed event and send it to user
whenever selected PHYs changes using SET_PHY_CONFIGURATION.

This will be also trigerred when BREDR pkt_type is changed using
the legacy ioctl HCISETPTYPE.

@ MGMT Command: Set PHY Configuration (0x0045) plen 4
		Selected PHYs: 0x7fff
		  BR 1M 1SLOT
		  BR 1M 3SLOT
		  BR 1M 5SLOT
		  EDR 2M 1SLOT
		  EDR 2M 3SLOT
		  EDR 2M 5SLOT
		  EDR 3M 1SLOT
		  EDR 3M 3SLOT
		  EDR 3M 5SLOT
		  LE 1M TX
		  LE 1M RX
		  LE 2M TX
		  LE 2M RX
		  LE CODED TX
		  LE CODED RX
< HCI Command: LE Set Default PHY (0x08|0x0031) plen 3
		All PHYs preference: 0x00
		TX PHYs preference: 0x07
		  LE 1M
		  LE 2M
		  LE Coded
		RX PHYs preference: 0x07
		  LE 1M
		  LE 2M
		  LE Coded
> HCI Event: Command Complete (0x0e) plen 4
	  LE Set Default PHY (0x08|0x0031) ncmd 1
		Status: Success (0x00)
@ MGMT Event: Command Complete (0x0001) plen 3
	  Set PHY Configuration (0x0045) plen 0
		Status: Success (0x00)
@ MGMT Event: PHY Configuration Changed (0x0026) plen 4
		Selected PHYs: 0x7fff
		  BR 1M 1SLOT
		  BR 1M 3SLOT
		  BR 1M 5SLOT
		  EDR 2M 1SLOT
		  EDR 2M 3SLOT
		  EDR 2M 5SLOT
		  EDR 3M 1SLOT
		  EDR 3M 3SLOT
		  EDR 3M 5SLOT
		  LE 1M TX
		  LE 1M RX
		  LE 2M TX
		  LE 2M RX
		  LE CODED TX
		  LE CODED RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 include/net/bluetooth/mgmt.h     | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a64d13f91d09..ab5d494a545a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1544,6 +1544,7 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
 			    u8 instance);
 void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
 			      u8 instance);
+int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
 
 u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
 		      u16 to_multiplier);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 0916e203e5d9..7f372e9067c9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -868,3 +868,8 @@ struct mgmt_ev_ext_info_changed {
 	__le16	eir_len;
 	__u8	eir[0];
 } __packed;
+
+#define MGMT_EV_PHY_CONFIGURATION_CHANGED	0x0026
+struct mgmt_ev_phy_configuration_changed {
+	__le32	selected_phys;
+} __packed;
-- 
cgit 


From 45bdd86eafc7d29e0b4b6681bec9c6ab8eddc6bf Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:37 +0530
Subject: Bluetooth: Set Scan PHYs based on selected PHYs by user

Use the PHYs selected in Set Phy Configuration management command
while scanning.

< HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 13
        Own address type: Random (0x01)
        Filter policy: Accept all advertisement (0x00)
        PHYs: 0x05
        Entry 0: LE 1M
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
        Entry 1: LE Coded
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Parameters (0x08|0x0041) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
        Extended scan: Enabled (0x01)
        Filter duplicates: Enabled (0x01)
        Duration: 0 msec (0x0000)
        Period: 0.00 sec (0x0000)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Enable (0x08|0x0042) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 4 +++-
 include/net/bluetooth/hci_core.h | 9 +++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 89bf800f6eb1..04211457367a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1540,7 +1540,9 @@ struct hci_cp_le_set_ext_scan_params {
 	__u8    data[0];
 } __packed;
 
-#define LE_SCAN_PHY_1M 0x01
+#define LE_SCAN_PHY_1M		0x01
+#define LE_SCAN_PHY_2M		0x02
+#define LE_SCAN_PHY_CODED	0x04
 
 struct hci_cp_le_scan_phy_params {
 	__u8    type;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ab5d494a545a..113c9bb609c7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1165,6 +1165,15 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
 				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
+#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
+		      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
+
+#define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \
+		      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
+
+#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
+			 ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
+
 /* Use ext scanning if set ext scan param and ext scan enable is supported */
 #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
 			   ((dev)->commands[37] & 0x40))
-- 
cgit 


From b2cc9761f144e8ef714be8c590603073b80ddc13 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:38 +0530
Subject: Bluetooth: Handle extended ADV PDU types

This patch defines the extended ADV types and handle it in ADV report.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 04211457367a..83a1593a128e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1976,6 +1976,14 @@ struct hci_ev_le_conn_complete {
 #define LE_LEGACY_SCAN_RSP_ADV		0x001b
 #define LE_LEGACY_SCAN_RSP_ADV_SCAN	0x001a
 
+/* Extended Advertising event types */
+#define LE_EXT_ADV_NON_CONN_IND		0x0000
+#define LE_EXT_ADV_CONN_IND		0x0001
+#define LE_EXT_ADV_SCAN_IND		0x0002
+#define LE_EXT_ADV_DIRECT_IND		0x0004
+#define LE_EXT_ADV_SCAN_RSP		0x0008
+#define LE_EXT_ADV_LEGACY_PDU		0x0010
+
 #define ADDR_LE_DEV_PUBLIC	0x00
 #define ADDR_LE_DEV_RANDOM	0x01
 
-- 
cgit 


From 6b49bcb4bce2ed0f0aefe8e304a8b9cbaeeaa3f0 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:40 +0530
Subject: Bluetooth: Read no of adv sets during init

This patch reads the number of advertising sets in the controller
during init and save it in hdev.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 7 +++++++
 include/net/bluetooth/hci_core.h | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 83a1593a128e..3f93ae9765a4 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -410,6 +410,7 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
 #define HCI_LE_PHY_CODED		0x08
@@ -1579,6 +1580,12 @@ struct hci_cp_le_ext_conn_param {
 	__le16 max_ce_len;
 } __packed;
 
+#define HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS	0x203b
+struct hci_rp_le_read_num_supported_adv_sets {
+	__u8  status;
+	__u8  num_of_sets;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 113c9bb609c7..2aad4a863176 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -222,6 +222,7 @@ struct hci_dev {
 	__u8		le_features[8];
 	__u8		le_white_list_size;
 	__u8		le_resolv_list_size;
+	__u8		le_num_of_adv_sets;
 	__u8		le_states[8];
 	__u8		commands[64];
 	__u8		hci_ver;
@@ -1180,6 +1181,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 /* Use ext create connection if command is supported */
 #define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
 
+/* Extended advertising support */
+#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
-- 
cgit 


From de181e887ac27dadda127c7d4c3e89c6da8fb6d2 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:41 +0530
Subject: Bluetooth: Impmlement extended adv enable

This patch basically replaces legacy adv with extended adv
based on the controller support. Currently there is no
design change. ie only one adv set will be enabled at a time.

This also adds tx_power in instance and store whatever returns
from Set_ext_parameter, use the same in adv data as well.
For instance 0 tx_power is stored in hdev only.

< HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25
        Handle: 0x00
        Properties: 0x0010
          Use legacy advertising PDUs: ADV_NONCONN_IND
        Min advertising interval: 1280.000 msec (0x0800)
        Max advertising interval: 1280.000 msec (0x0800)
        Channel map: 37, 38, 39 (0x07)
        Own address type: Random (0x01)
        Peer address type: Public (0x00)
        Peer address: 00:00:00:00:00:00 (OUI 00-00-00)
        Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00)
        TX power: 127 dbm (0x7f)
        Primary PHY: LE 1M (0x01)
        Secondary max skip: 0x00
        Secondary PHY: LE 1M (0x01)
        SID: 0x00
        Scan request notifications: Disabled (0x00)
> HCI Event: Command Complete (0x0e) plen 5
      LE Set Extended Advertising Parameters (0x08|0x0036) ncmd 1
        Status: Success (0x00)
        TX power (selected): 7 dbm (0x07)
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 6
        Extended advertising: Enabled (0x01)
        Number of sets: 1 (0x01)
        Entry 0
          Handle: 0x00
          Duration: 0 ms (0x00)
          Max ext adv events: 0
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 39 +++++++++++++++++++++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  1 +
 2 files changed, 40 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3f93ae9765a4..b447b127879e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1586,6 +1586,45 @@ struct hci_rp_le_read_num_supported_adv_sets {
 	__u8  num_of_sets;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_ADV_PARAMS		0x2036
+struct hci_cp_le_set_ext_adv_params {
+	__u8      handle;
+	__le16    evt_properties;
+	__u8      min_interval[3];
+	__u8      max_interval[3];
+	__u8      channel_map;
+	__u8      own_addr_type;
+	__u8      peer_addr_type;
+	bdaddr_t  peer_addr;
+	__u8      filter_policy;
+	__u8      tx_power;
+	__u8      primary_phy;
+	__u8      secondary_max_skip;
+	__u8      secondary_phy;
+	__u8      sid;
+	__u8      notif_enable;
+} __packed;
+
+#define HCI_ADV_PHY_1M		0X01
+
+struct hci_rp_le_set_ext_adv_params {
+	__u8  status;
+	__u8  tx_power;
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_ADV_ENABLE		0x2039
+struct hci_cp_le_set_ext_adv_enable {
+	__u8  enable;
+	__u8  num_of_sets;
+	__u8  data[0];
+} __packed;
+
+struct hci_cp_ext_adv_set {
+	__u8  handle;
+	__le16 duration;
+	__u8  max_events;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2aad4a863176..ad3518303a0c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -171,6 +171,7 @@ struct adv_info {
 	__u8	adv_data[HCI_MAX_AD_LENGTH];
 	__u16	scan_rsp_len;
 	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
+	__s8	tx_power;
 };
 
 #define HCI_MAX_ADV_INSTANCES		5
-- 
cgit 


From a0fb3726ba55138ef6fdd5dc67da6d9a70360696 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:42 +0530
Subject: Bluetooth: Use Set ext adv/scan rsp data if controller supports

This patch implements Set Ext Adv data and Set Ext Scan rsp data
if controller support extended advertising.

Currently the operation is set as Complete data and fragment
preference is set as no fragment

< HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 35
        Handle: 0x00
        Operation: Complete extended advertising data (0x03)
        Fragment preference: Minimize fragmentation (0x01)
        Data length: 0x15
        16-bit Service UUIDs (complete): 2 entries
          Heart Rate (0x180d)
          Battery Service (0x180f)
        Name (complete): Test LE
        Company: Google (224)
          Data: 0102
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Data (0x08|0x0037) ncmd 1
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index b447b127879e..aace97099ead 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1625,6 +1625,28 @@ struct hci_cp_ext_adv_set {
 	__u8  max_events;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_ADV_DATA		0x2037
+struct hci_cp_le_set_ext_adv_data {
+	__u8  handle;
+	__u8  operation;
+	__u8  frag_pref;
+	__u8  length;
+	__u8  data[HCI_MAX_AD_LENGTH];
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA		0x2038
+struct hci_cp_le_set_ext_scan_rsp_data {
+	__u8  handle;
+	__u8  operation;
+	__u8  frag_pref;
+	__u8  length;
+	__u8  data[HCI_MAX_AD_LENGTH];
+} __packed;
+
+#define LE_SET_ADV_DATA_OP_COMPLETE	0x03
+
+#define LE_SET_ADV_DATA_NO_FRAG		0x01
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
-- 
cgit 


From 45b7749f16aacd9ffab8e958caa77e2aa2358c0b Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:43 +0530
Subject: Bluetooth: Implement disable and removal of adv instance

If ext adv is enabled then use ext adv to disable as well.
Also remove the adv set during LE disable.

< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 2
        Extended advertising: Disabled (0x00)
        Number of sets: Disable all sets (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index aace97099ead..faa2922a69fd 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1647,6 +1647,8 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 
 #define LE_SET_ADV_DATA_NO_FRAG		0x01
 
+#define HCI_OP_LE_CLEAR_ADV_SETS	0x203d
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
-- 
cgit 


From a73c046a2869048430c332a871a5b169f192c6c3 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:45 +0530
Subject: Bluetooth: Implement Set ADV set random address

This basically sets the random address for the adv instance
Random address can be set only if the instance is created which
is done in Set ext adv param.

Random address and rpa expire timer and flags have been added
to adv instance which will be used when the respective
instance is scheduled.

This introduces a hci_get_random_address() which returns the
own address type and random address (rpa or nrpa) based
on the instance flags and hdev flags. New function is required
since own address type should be known before setting adv params
but address can be set only after setting params.

< HCI Command: LE Set Advertising Set Random Address (0x08|0x0035) plen 7
        Advertising handle: 0x00
        Advertising random address: 3C:8E:56:9B:77:84 (OUI 3C-8E-56)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Advertising Set Random Address (0x08|0x0035) ncmd 1
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 6 ++++++
 include/net/bluetooth/hci_core.h | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index faa2922a69fd..8d348d0d3eea 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1649,6 +1649,12 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 
 #define HCI_OP_LE_CLEAR_ADV_SETS	0x203d
 
+#define HCI_OP_LE_SET_ADV_SET_RAND_ADDR	0x2035
+struct hci_cp_le_set_adv_set_rand_addr {
+	__u8  handle;
+	bdaddr_t  bdaddr;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ad3518303a0c..0db1b9b428b7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -172,6 +172,9 @@ struct adv_info {
 	__u16	scan_rsp_len;
 	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
 	__s8	tx_power;
+	bdaddr_t	random_addr;
+	bool 		rpa_expired;
+	struct delayed_work	rpa_expired_cb;
 };
 
 #define HCI_MAX_ADV_INSTANCES		5
@@ -1113,6 +1116,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
 			 u16 scan_rsp_len, u8 *scan_rsp_data,
 			 u16 timeout, u16 duration);
 int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
+void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired);
 
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
-- 
cgit 


From acf0aeae431a0f1723385cd1cb50177e4cc10edd Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:46 +0530
Subject: Bluetooth: Handle ADv set terminated event

This event comes after connection complete event for incoming
connections. Since we now have different random address for
each instance, conn resp address is assigned from this event.

As of now only connection part is handled as we are not
enabling duration or max num of events while starting ext adv.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8d348d0d3eea..57e3e3675d66 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2155,6 +2155,14 @@ struct hci_ev_le_enh_conn_complete {
 	__u8      clk_accurancy;
 } __packed;
 
+#define HCI_EV_LE_EXT_ADV_SET_TERM	0x12
+struct hci_evt_le_ext_adv_set_term {
+	__u8	status;
+	__u8	handle;
+	__le16	conn_handle;
+	__u8	num_evts;
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
-- 
cgit 


From 85a721a8b0b6880d8cf6b9def70404ade8563225 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:47 +0530
Subject: Bluetooth: Implement secondary advertising on different PHYs

This patch adds support for advertising in primary and secondary
channel on different PHYs. User can add the phy preference in
the flag based on which phy type will be added in extended
advertising parameter would be set.

@ MGMT Command: Add Advertising (0x003e) plen 11
        Instance: 1
        Flags: 0x00000200
          Advertise in CODED on Secondary channel
        Duration: 0
        Timeout: 0
        Advertising data length: 0
        Scan response length: 0
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 2
        Extended advertising: Disabled (0x00)
        Number of sets: Disable all sets (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)
< HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25
        Handle: 0x00
        Properties: 0x0000
        Min advertising interval: 1280.000 msec (0x0800)
        Max advertising interval: 1280.000 msec (0x0800)
        Channel map: 37, 38, 39 (0x07)
        Own address type: Random (0x01)
        Peer address type: Public (0x00)
        Peer address: 00:00:00:00:00:00 (OUI 00-00-00)
        Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00)
        TX power: 127 dbm (0x7f)
        Primary PHY: LE Coded (0x03)
        Secondary max skip: 0x00
        Secondary PHY: LE Coded (0x03)
        SID: 0x00
        Scan request notifications: Disabled (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h  | 4 ++++
 include/net/bluetooth/mgmt.h | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 57e3e3675d66..8ff36463719f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -410,6 +410,8 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_PHY_2M			0x01
+#define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
@@ -1606,6 +1608,8 @@ struct hci_cp_le_set_ext_adv_params {
 } __packed;
 
 #define HCI_ADV_PHY_1M		0X01
+#define HCI_ADV_PHY_2M		0x02
+#define HCI_ADV_PHY_CODED	0x03
 
 struct hci_rp_le_set_ext_adv_params {
 	__u8  status;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 7f372e9067c9..9cee7ddc6741 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -562,6 +562,12 @@ struct mgmt_rp_add_advertising {
 #define MGMT_ADV_FLAG_TX_POWER		BIT(4)
 #define MGMT_ADV_FLAG_APPEARANCE	BIT(5)
 #define MGMT_ADV_FLAG_LOCAL_NAME	BIT(6)
+#define MGMT_ADV_FLAG_SEC_1M 		BIT(7)
+#define MGMT_ADV_FLAG_SEC_2M 		BIT(8)
+#define MGMT_ADV_FLAG_SEC_CODED 	BIT(9)
+
+#define MGMT_ADV_FLAG_SEC_MASK	(MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
+				 MGMT_ADV_FLAG_SEC_CODED)
 
 #define MGMT_OP_REMOVE_ADVERTISING	0x003F
 struct mgmt_cp_remove_advertising {
-- 
cgit 


From 740011cfe94859df8d05f5400d589a8693b095e7 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 20 Jul 2018 13:12:28 +0800
Subject: Bluetooth: Add new quirk for non-persistent setup settings

Add a new quirk HCI_QUIRK_NON_PERSISTENT_SETUP allowing that a quirk that
runs setup() after every open() and not just after the first open().

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8ff36463719f..7f008097552e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -183,6 +183,15 @@ enum {
 	 * during the hdev->setup vendor callback.
 	 */
 	HCI_QUIRK_NON_PERSISTENT_DIAG,
+
+	/* When this quirk is set, setup() would be run after every
+	 * open() and not just after the first open().
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 *
+	 */
+	HCI_QUIRK_NON_PERSISTENT_SETUP,
 };
 
 /* HCI device flags */
-- 
cgit 


From dd979b4df817e9976f18fb6f9d134d6bc4a3c317 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:10 +0200
Subject: net: simplify sock_poll_wait

The wait_address argument is always directly derived from the filp
argument, so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 83b747538bd0..0518f61926ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2057,16 +2057,17 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 /**
  * sock_poll_wait - place memory barrier behind the poll_wait call.
  * @filp:           file
- * @wait_address:   socket wait queue
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
  */
-static inline void sock_poll_wait(struct file *filp,
-		wait_queue_head_t *wait_address, poll_table *p)
+static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
-	if (!poll_does_not_wait(p) && wait_address) {
-		poll_wait(filp, wait_address, p);
+	struct socket *sock = filp->private_data;
+	wait_queue_head_t *wq = sk_sleep(sock->sk);
+
+	if (!poll_does_not_wait(p) && wq) {
+		poll_wait(filp, wq, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-- 
cgit 


From d8bbd13beeaacd6494954bf5b945b54ccb2af309 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:11 +0200
Subject: net: don not detour through struct sock to find the poll waitqueue

For any open socket file descriptor sock->sk->sk_wq->wait will always
point to sock->wq->wait.  That means we can do the shorter dereference
and removal a NULL check and don't have to not worry about any RCU
protection.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 0518f61926ec..2afea5d1bdfe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2064,10 +2064,9 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
 	struct socket *sock = filp->private_data;
-	wait_queue_head_t *wq = sk_sleep(sock->sk);
 
-	if (!poll_does_not_wait(p) && wq) {
-		poll_wait(filp, wq, p);
+	if (!poll_does_not_wait(p)) {
+		poll_wait(filp, &sock->wq->wait, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-- 
cgit 


From f641f13b992979b97e595b761a9ba1a64fed7c4e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:12 +0200
Subject: net: remove sock_poll_busy_loop

There is no point in hiding this logic in a helper.  Also remove the
useless events != 0 check and only busy loop once we know we actually
have a poll method.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/net')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 9e36fda652b7..85777e68f738 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,15 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
-{
-	if (sk_can_busy_loop(sock->sk) &&
-	    events && (events & POLL_BUSY_LOOP)) {
-		/* once, only if requested by syscall */
-		sk_busy_loop(sock->sk, 1);
-	}
-}
-
 /* if this socket can poll_ll, tell the system call */
 static inline __poll_t sock_poll_busy_flag(struct socket *sock)
 {
-- 
cgit 


From a331de3bf0e66ab2437fc8c5b99bd3c0d9da3088 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:13 +0200
Subject: net: remove sock_poll_busy_flag

Fold it into the only caller to make the code simpler and easier to read.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 85777e68f738..ba61cdd09eaa 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,12 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-/* if this socket can poll_ll, tell the system call */
-static inline __poll_t sock_poll_busy_flag(struct socket *sock)
-{
-	return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
-}
-
 /* used in the NIC receive handler to mark the skb */
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
-- 
cgit 


From 7fd4b288ea6a3e45ad8afbcd5ec39554d57f1ae0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:43 +0200
Subject: tc/act: remove unneeded RCU lock in action callback

Each lockless action currently does its own RCU locking in ->act().
This allows using plain RCU accessor, even if the context
is really RCU BH.

This change drops the per action RCU lock, replace the accessors
with the _bh variant, cleans up a bit the surrounding code and
documents the RCU status in the relevant header.
No functional nor performance change is intended.

The goal of this patch is clarifying that the RCU critical section
used by the tc actions extends up to the classifier's caller.

v1 -> v2:
 - preserve rcu lock in act_bpf: it's needed by eBPF helpers,
   as pointed out by Daniel

v3 -> v4:
 - fixed some typos in the commit message (JiriP)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h     | 2 +-
 include/net/sch_generic.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 683ce41053d9..8c9bc02d05e1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -85,7 +85,7 @@ struct tc_action_ops {
 	size_t	size;
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, const struct tc_action *,
-		       struct tcf_result *);
+		       struct tcf_result *); /* called under RCU BH lock*/
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
 	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5432362dc26..bcae181c1857 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -285,6 +285,8 @@ struct tcf_proto {
 	/* Fast access part */
 	struct tcf_proto __rcu	*next;
 	void __rcu		*root;
+
+	/* called under RCU BH lock*/
 	int			(*classify)(struct sk_buff *,
 					    const struct tcf_proto *,
 					    struct tcf_result *);
-- 
cgit 


From cd11b164073b719203318227918f9510809d5e10 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:44 +0200
Subject: net/tc: introduce TC_ACT_REINSERT.

This is similar TC_ACT_REDIRECT, but with a slightly different
semantic:
- on ingress the mirred skbs are passed to the target device
network stack without any additional check not scrubbing.
- the rcu-protected stats provided via the tcf_result struct
  are updated on error conditions.

This new tcfa_action value is not exposed to the user-space
and can be used only internally by clsact.

v1 -> v2: do not touch TC_ACT_REDIRECT code path, introduce
 a new action type instead
v2 -> v3:
 - rename the new action value TC_ACT_REINJECT, update the
   helper accordingly
 - take care of uncloned reinjected packets in XDP generic
   hook
v3 -> v4:
 - renamed again the new action value (JiriP)
v4 -> v5:
 - fix build error with !NET_CLS_ACT (kbuild bot)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  3 +++
 include/net/sch_generic.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6d02f31abba8..22bfc3a13c25 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -7,6 +7,9 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
+/* TC action not accessible from user space */
+#define TC_ACT_REINSERT		(TC_ACT_VALUE_MAX + 1)
+
 /* Basic packet classifier frontend definitions. */
 
 struct tcf_walker {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bcae181c1857..a6d00093f35e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -235,6 +235,12 @@ struct tcf_result {
 			u32		classid;
 		};
 		const struct tcf_proto *goto_tp;
+
+		/* used by the TC_ACT_REINSERT action */
+		struct {
+			bool		ingress;
+			struct gnet_stats_queue *qstats;
+		};
 	};
 };
 
@@ -569,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
 #endif
 }
 
+static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return skb->tc_redirected;
+#else
+	return false;
+#endif
+}
+
 static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1108,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
 			  struct mini_Qdisc __rcu **p_miniq);
 
+static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+{
+	struct gnet_stats_queue *stats = res->qstats;
+	int ret;
+
+	if (res->ingress)
+		ret = netif_receive_skb(skb);
+	else
+		ret = dev_queue_xmit(skb);
+	if (ret && stats)
+		qstats_overlimit_inc(res->qstats);
+}
+
 #endif
-- 
cgit 


From e6476c21447c4b17c47e476aade6facf050f31e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:45:07 +0200
Subject: net: remove bogus RCU annotations on socket.wq

We never use RCU protection for it, just a lot of cargo-cult
rcu_deference_protects calls.

Note that we do keep the kfree_rcu call for it, as the references through
struct sock are RCU protected and thus might require a grace period before
freeing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2afea5d1bdfe..433f45fc2d68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1788,7 +1788,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	WARN_ON(parent->sk);
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_wq = parent->wq;
+	rcu_assign_pointer(sk->sk_wq, parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
-- 
cgit 


From 83ba4645152d1177c161750e1064e3a8e7cee19b Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Tue, 31 Jul 2018 21:18:11 +0200
Subject: net: add helpers checking if socket can be bound to nonlocal address

The construction "net->ipv4.sysctl_ip_nonlocal_bind || inet->freebind
|| inet->transparent" is present three times and its IPv6 counterpart
is also present three times. We introduce two small helpers to
characterize these tests uniformly.

Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 8 ++++++++
 include/net/ipv6.h      | 7 +++++++
 2 files changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 314be484c696..e03b93360f33 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -359,4 +359,12 @@ static inline bool inet_get_convert_csum(struct sock *sk)
 	return !!inet_sk(sk)->convert_csum;
 }
 
+
+static inline bool inet_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv4.sysctl_ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index a44509f4e985..82deb684ba73 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -766,6 +766,13 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	return hlimit;
 }
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv6.sysctl.ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
  * Equivalent to :	flow->v6addrs.src = iph->saddr;
  *			flow->v6addrs.dst = iph->daddr;
-- 
cgit 


From 432e05d328921c68c35bfdeff7d7b7400b8e3d1a Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 1 Aug 2018 00:36:03 +0200
Subject: net: ipv4: Control SKB reprioritization after forwarding

After IPv4 packets are forwarded, the priority of the corresponding SKB
is updated according to the TOS field of IPv4 header. This overrides any
prioritization done earlier by e.g. an skbedit action or ingress-qos-map
defined at a vlan device.

Such overriding may not always be desirable. Even if the packet ends up
being routed, which implies this is an L3 network node, an administrator
may wish to preserve whatever prioritization was done earlier on in the
pipeline.

Therefore introduce a sysctl that controls this behavior. Keep the
default value at 1 to maintain backward-compatible behavior.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 661348f23ea5..e47503b4e4d1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -98,6 +98,7 @@ struct netns_ipv4 {
 	int sysctl_ip_default_ttl;
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
+	int sysctl_ip_fwd_update_priority;
 	int sysctl_ip_nonlocal_bind;
 	/* Shall we try to damage output packets if routing dev changes? */
 	int sysctl_ip_dynaddr;
-- 
cgit 


From d18c5d1995aa322b722fa731397e28ebcd00b3c6 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 1 Aug 2018 00:36:42 +0200
Subject: net: ipv4: Notify about changes to ip_forward_update_priority

Drivers may make offloading decision based on whether
ip_forward_update_priority is enabled or not. Therefore distribute
netevent notifications to give them a chance to react to a change.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netevent.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/netevent.h b/include/net/netevent.h
index d9918261701c..4107016c3bb4 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -28,6 +28,7 @@ enum netevent_notif_type {
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
-- 
cgit 


From 290b1c8b1a902c0902df9ec05577ab209296f345 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 1 Aug 2018 12:36:57 +0200
Subject: net: sched: make tcf_chain_{get,put}() static

These are no longer used outside of cls_api.c so make them static.
Move tcf_chain_flush() to avoid fwd declaration of tcf_chain_put().

Signed-off-by: Jiri Pirko <jiri@mellanox.com>

v1->v2:
- new patch

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 22bfc3a13c25..ef727f71336e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -40,11 +40,8 @@ struct tcf_block_cb;
 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 
 #ifdef CONFIG_NET_CLS
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-				bool create);
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
 				       u32 chain_index);
-void tcf_chain_put(struct tcf_chain *chain);
 void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
-- 
cgit 


From db57dc7c7a5c42bb653425a01b6d73c49514b5db Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Wed, 1 Aug 2018 22:05:10 +0200
Subject: net: don't declare IPv6 non-local bind helper if CONFIG_IPV6
 undefined

Fixes: 83ba4645152d ("net: add helpers checking if socket can be bound to nonlocal address")
Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82deb684ba73..ff33f498c137 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -766,13 +766,6 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	return hlimit;
 }
 
-static inline bool ipv6_can_nonlocal_bind(struct net *net,
-					  struct inet_sock *inet)
-{
-	return net->ipv6.sysctl.ip_nonlocal_bind ||
-		inet->freebind || inet->transparent;
-}
-
 /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
  * Equivalent to :	flow->v6addrs.src = iph->saddr;
  *			flow->v6addrs.dst = iph->daddr;
@@ -789,6 +782,13 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv6.sysctl.ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 /* Sysctl settings for net ipv6.auto_flowlabels */
 #define IP6_AUTO_FLOW_LABEL_OFF		0
 #define IP6_AUTO_FLOW_LABEL_OPTOUT	1
-- 
cgit 


From 285189c78eeb6f684a024b86fb5997d10c6aa564 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 25 Jul 2018 15:52:13 +0800
Subject: netfilter: use kvmalloc_array to allocate memory for hashtable

nf_ct_alloc_hashtable is used to allocate memory for conntrack,
NAT bysrc and expectation hashtable. Assuming 64k bucket size,
which means 7th order page allocation, __get_free_pages, called
by nf_ct_alloc_hashtable, will trigger the direct memory reclaim
and stall for a long time, when system has lots of memory stress

so replace combination of __get_free_pages and vzalloc with
kvmalloc_array, which provides a overflow check and a fallback
if no high order memory is available, and do not retry to reclaim
memory, reduce stall

and remove nf_ct_free_hashtable, since it is just a kvfree

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Wang Li <wangli39@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a2b0ed025908..7e012312cd61 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -176,8 +176,6 @@ void nf_ct_netns_put(struct net *net, u8 nfproto);
  */
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
 
-void nf_ct_free_hashtable(void *hash, unsigned int size);
-
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 
-- 
cgit 


From eb9950eb31f56e57582a61c92073336d04a26542 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 3 Aug 2018 17:06:56 +0100
Subject: rxrpc: Push iov_iter up from rxrpc_kernel_recv_data() to caller

Push iov_iter up from rxrpc_kernel_recv_data() to its caller to allow
non-contiguous iovs to be passed down, thereby permitting file reading to
be simplified in the AFS filesystem in a future patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_rxrpc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 8ae8ee004258..f53edb3754bc 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,7 +61,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 			   struct msghdr *, size_t,
 			   rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
-			   void *, size_t, size_t *, bool, u32 *, u16 *);
+			   struct iov_iter *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
 			     u32, int, const char *);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
-- 
cgit 


From 31ba191bf5ab2975c1955f1adf771a5a0b57afaa Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 3 Aug 2018 14:53:15 +0800
Subject: include/net/bond_3ad: Simplify the code by using the ARRAY_SIZE

We prefer to ARRAY_SIZE rather than the open code to calculate size.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bond_3ad.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index f358ad5e4214..fc3111515f5c 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -283,7 +283,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
 		"none",
 		"unknown"
 	};
-	int max_size = sizeof(churn_description) / sizeof(churn_description[0]);
+	int max_size = ARRAY_SIZE(churn_description);
 
 	if (state >= max_size)
 		state = max_size - 1;
-- 
cgit 


From fa0f527358bd900ef92f925878ed6bfbd51305cc Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:39 +0000
Subject: ip: use rb trees for IP frag queue.

Similar to TCP OOO RX queue, it makes sense to use rb trees to store
IP fragments, so that OOO fragments are inserted faster.

Tested:

- a follow-up patch contains a rather comprehensive ip defrag
  self-test (functional)
- ran neper `udp_stream -c -H <host> -F 100 -l 300 -T 20`:
    netstat --statistics
    Ip:
        282078937 total packets received
        0 forwarded
        0 incoming packets discarded
        946760 incoming packets delivered
        18743456 requests sent out
        101 fragments dropped after timeout
        282077129 reassemblies required
        944952 packets reassembled ok
        262734239 packet reassembles failed
   (The numbers/stats above are somewhat better re:
    reassemblies vs a kernel without this patchset. More
    comprehensive performance testing TBD).

Reported-by: Jann Horn <jannh@google.com>
Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index f4272a29dc44..b86d14528188 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -75,7 +75,8 @@ struct inet_frag_queue {
 	struct timer_list	timer;
 	spinlock_t		lock;
 	refcount_t		refcnt;
-	struct sk_buff		*fragments;
+	struct sk_buff		*fragments;  /* Used in IPv6. */
+	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;
-- 
cgit