From e70a3aad44cc8b24986687ffc98c4a4f6ecf25ea Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 2 Aug 2018 23:20:38 -0700
Subject: ipv6: fix double refcount of fib6_metrics

All the callers of ip6_rt_copy_init()/rt6_set_from() hold refcnt
of the "from" fib6_info, so there is no need to hold fib6_metrics
refcnt again, because fib6_metrics refcnt is only released when
fib6_info is gone, that is, they have the same life time, so the
whole fib6_metrics refcnt can be removed actually.

This fixes a kmemleak warning reported by Sabrina.

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ec18b3ce8b6d..7208c16302f6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -978,10 +978,6 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 	rt->rt6i_flags &= ~RTF_EXPIRES;
 	rcu_assign_pointer(rt->from, from);
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
-	if (from->fib6_metrics != &dst_default_metrics) {
-		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
-		refcount_inc(&from->fib6_metrics->refcnt);
-	}
 }
 
 /* Caller must already hold reference to @ort */
-- 
cgit 


From 82a40777de12728dedf4075453b694f0d1baee80 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 5 Aug 2018 22:46:07 +0800
Subject: ip6_tunnel: use the right value for ipv4 min mtu check in
 ip6_tnl_xmit

According to RFC791, 68 bytes is the minimum size of IPv4 datagram every
device must be able to forward without further fragmentation while 576
bytes is the minimum size of IPv4 datagram every device has to be able
to receive, so in ip6_tnl_xmit(), 68(IPV4_MIN_MTU) should be the right
value for the ipv4 min mtu check in ip6_tnl_xmit.

While at it, change to use max() instead of if statement.

Fixes: c9fefa08190f ("ip6_tunnel: get the min mtu properly in ip6_tnl_xmit")
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 00e138a44cbb..1cc9650af9fb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1133,12 +1133,8 @@ route_lookup:
 		max_headroom += 8;
 		mtu -= 8;
 	}
-	if (skb->protocol == htons(ETH_P_IPV6)) {
-		if (mtu < IPV6_MIN_MTU)
-			mtu = IPV6_MIN_MTU;
-	} else if (mtu < 576) {
-		mtu = 576;
-	}
+	mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
+		       IPV6_MIN_MTU : IPV4_MIN_MTU);
 
 	skb_dst_update_pmtu(skb, mtu);
 	if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
-- 
cgit 


From 4576cd469d980317c4edd9173f8b694aa71ea3a3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 6 Aug 2018 10:38:34 -0400
Subject: packet: refine ring v3 block size test to hold one frame

TPACKET_V3 stores variable length frames in fixed length blocks.
Blocks must be able to store a block header, optional private space
and at least one minimum sized frame.

Frames, even for a zero snaplen packet, store metadata headers and
optional reserved space.

In the block size bounds check, ensure that the frame of the
chosen configuration fits. This includes sockaddr_ll and optional
tp_reserve.

Syzbot was able to construct a ring with insuffient room for the
sockaddr_ll in the header of a zero-length frame, triggering an
out-of-bounds write in dev_parse_header.

Convert the comparison to less than, as zero is a valid snap len.
This matches the test for minimum tp_frame_size immediately below.

Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.")
Fixes: eb73190f4fbe ("net/packet: refine check for priv area size")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9b27d0cd766d..e6445d8f3f57 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4226,6 +4226,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	}
 
 	if (req->tp_block_nr) {
+		unsigned int min_frame_size;
+
 		/* Sanity tests and some calculations */
 		err = -EBUSY;
 		if (unlikely(rb->pg_vec))
@@ -4248,12 +4250,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
 			goto out;
+		min_frame_size = po->tp_hdrlen + po->tp_reserve;
 		if (po->tp_version >= TPACKET_V3 &&
-		    req->tp_block_size <=
-		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr))
+		    req->tp_block_size <
+		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
 			goto out;
-		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
-					po->tp_reserve))
+		if (unlikely(req->tp_frame_size < min_frame_size))
 			goto out;
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			goto out;
-- 
cgit 


From 455f05ecd2b219e9a216050796d30c830d9bc393 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Aug 2018 11:06:02 -0700
Subject: vsock: split dwork to avoid reinitializations

syzbot reported that we reinitialize an active delayed
work in vsock_stream_connect():

	ODEBUG: init active (active state 0) object type: timer_list hint:
	delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414
	WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329
	debug_print_object+0x16a/0x210 lib/debugobjects.c:326

The pattern is apparently wrong, we should only initialize
the dealyed work once and could repeatly schedule it. So we
have to move out the initializations to allocation side.
And to avoid confusion, we can split the shared dwork
into two, instead of re-using the same one.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Reported-by: <syzbot+8a9b1bd330476a4f3db6@syzkaller.appspotmail.com>
Cc: Andy king <acking@vmware.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c       | 15 ++++++++-------
 net/vmw_vsock/vmci_transport.c |  3 +--
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c1076c19b858..ab27a2872935 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode)
 	return transport->shutdown(vsock_sk(sk), mode);
 }
 
-void vsock_pending_work(struct work_struct *work)
+static void vsock_pending_work(struct work_struct *work)
 {
 	struct sock *sk;
 	struct sock *listener;
 	struct vsock_sock *vsk;
 	bool cleanup;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, pending_work.work);
 	sk = sk_vsock(vsk);
 	listener = vsk->listener;
 	cleanup = true;
@@ -498,7 +498,6 @@ out:
 	sock_put(sk);
 	sock_put(listener);
 }
-EXPORT_SYMBOL_GPL(vsock_pending_work);
 
 /**** SOCKET OPERATIONS ****/
 
@@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 	return retval;
 }
 
+static void vsock_connect_timeout(struct work_struct *work);
+
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
@@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net *net,
 	vsk->sent_request = false;
 	vsk->ignore_connecting_rst = false;
 	vsk->peer_shutdown = 0;
+	INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
+	INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
 
 	psk = parent ? vsock_sk(parent) : NULL;
 	if (parent) {
@@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct work_struct *work)
 	struct vsock_sock *vsk;
 	int cancel = 0;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, connect_work.work);
 	sk = sk_vsock(vsk);
 
 	lock_sock(sk);
@@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 			 * timeout fires.
 			 */
 			sock_hold(sk);
-			INIT_DELAYED_WORK(&vsk->dwork,
-					  vsock_connect_timeout);
-			schedule_delayed_work(&vsk->dwork, timeout);
+			schedule_delayed_work(&vsk->connect_work, timeout);
 
 			/* Skip ahead to preserve error code set above. */
 			goto out_wait;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index a7a73ffe675b..cb332adb84cd 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
 	vpending->listener = sk;
 	sock_hold(sk);
 	sock_hold(pending);
-	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
-	schedule_delayed_work(&vpending->dwork, HZ);
+	schedule_delayed_work(&vpending->pending_work, HZ);
 
 out:
 	return err;
-- 
cgit 


From 37436d9c0e8f62c3eebe204ff5776ff31fd64658 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 7 Aug 2018 15:52:32 +0800
Subject: tipc: fix an interrupt unsafe locking scenario

Commit 9faa89d4ed9d ("tipc: make function tipc_net_finalize() thread
safe") tries to make it thread safe to set node address, so it uses
node_list_lock lock to serialize the whole process of setting node
address in tipc_net_finalize(). But it causes the following interrupt
unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  rht_deferred_worker()
  rhashtable_rehash_table()
  lock(&(&ht->lock)->rlock)
			       tipc_nl_compat_doit()
                               tipc_net_finalize()
                               local_irq_disable();
                               lock(&(&tn->node_list_lock)->rlock);
                               tipc_sk_reinit()
                               rhashtable_walk_enter()
                               lock(&(&ht->lock)->rlock);
  <Interrupt>
  tipc_disc_rcv()
  tipc_node_check_dest()
  tipc_node_create()
  lock(&(&tn->node_list_lock)->rlock);

 *** DEADLOCK ***

When rhashtable_rehash_table() holds ht->lock on CPU0, it doesn't
disable BH. So if an interrupt happens after the lock, it can create
an inverse lock ordering between ht->lock and tn->node_list_lock. As
a consequence, deadlock might happen.

The reason causing the inverse lock ordering scenario above is because
the initial purpose of node_list_lock is not designed to do the
serialization of node address setting.

As cmpxchg() can guarantee CAS (compare-and-swap) process is atomic,
we use it to replace node_list_lock to ensure setting node address can
be atomically finished. It turns out the potential deadlock can be
avoided as well.

Fixes: 9faa89d4ed9d ("tipc: make function tipc_net_finalize() thread safe")
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <maloy@donjonn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/net.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/net.c b/net/tipc/net.c
index a7f6964c3a4b..62199cf5a56c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -123,15 +123,13 @@ void tipc_net_finalize(struct net *net, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
 
-	spin_lock_bh(&tn->node_list_lock);
-	if (!tipc_own_addr(net)) {
+	if (!cmpxchg(&tn->node_addr, 0, addr)) {
 		tipc_set_node_addr(net, addr);
 		tipc_named_reinit(net);
 		tipc_sk_reinit(net);
 		tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
 				     TIPC_CLUSTER_SCOPE, 0, addr);
 	}
-	spin_unlock_bh(&tn->node_list_lock);
 }
 
 void tipc_net_stop(struct net *net)
-- 
cgit 


From 61ef4b07fcdc30535889990cf4229766502561cf Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Tue, 7 Aug 2018 20:03:57 +0300
Subject: dccp: fix undefined behavior with 'cwnd' shift in
 ccid2_cwnd_restart()

The shift of 'cwnd' with '(now - hc->tx_lsndtime) / hc->tx_rto' value
can lead to undefined behavior [1].

In order to fix this use a gradual shift of the window with a 'while'
loop, similar to what tcp_cwnd_restart() is doing.

When comparing delta and RTO there is a minor difference between TCP
and DCCP, the last one also invokes dccp_cwnd_restart() and reduces
'cwnd' if delta equals RTO. That case is preserved in this change.

[1]:
[40850.963623] UBSAN: Undefined behaviour in net/dccp/ccids/ccid2.c:237:7
[40851.043858] shift exponent 67 is too large for 32-bit type 'unsigned int'
[40851.127163] CPU: 3 PID: 15940 Comm: netstress Tainted: G        W   E     4.18.0-rc7.x86_64 #1
...
[40851.377176] Call Trace:
[40851.408503]  dump_stack+0xf1/0x17b
[40851.451331]  ? show_regs_print_info+0x5/0x5
[40851.503555]  ubsan_epilogue+0x9/0x7c
[40851.548363]  __ubsan_handle_shift_out_of_bounds+0x25b/0x2b4
[40851.617109]  ? __ubsan_handle_load_invalid_value+0x18f/0x18f
[40851.686796]  ? xfrm4_output_finish+0x80/0x80
[40851.739827]  ? lock_downgrade+0x6d0/0x6d0
[40851.789744]  ? xfrm4_prepare_output+0x160/0x160
[40851.845912]  ? ip_queue_xmit+0x810/0x1db0
[40851.895845]  ? ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
[40851.963530]  ccid2_hc_tx_packet_sent+0xd36/0x10a0 [dccp]
[40852.029063]  dccp_xmit_packet+0x1d3/0x720 [dccp]
[40852.086254]  dccp_write_xmit+0x116/0x1d0 [dccp]
[40852.142412]  dccp_sendmsg+0x428/0xb20 [dccp]
[40852.195454]  ? inet_dccp_listen+0x200/0x200 [dccp]
[40852.254833]  ? sched_clock+0x5/0x10
[40852.298508]  ? sched_clock+0x5/0x10
[40852.342194]  ? inet_create+0xdf0/0xdf0
[40852.388988]  sock_sendmsg+0xd9/0x160
...

Fixes: 113ced1f52e5 ("dccp ccid-2: Perform congestion-window validation")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 2b75df469220..842a9c7c73a3 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -229,14 +229,16 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 	u32 cwnd = hc->tx_cwnd, restart_cwnd,
 	    iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
+	s32 delta = now - hc->tx_lsndtime;
 
 	hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
 
 	/* don't reduce cwnd below the initial window (IW) */
 	restart_cwnd = min(cwnd, iwnd);
-	cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
-	hc->tx_cwnd = max(cwnd, restart_cwnd);
 
+	while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
+		cwnd >>= 1;
+	hc->tx_cwnd = max(cwnd, restart_cwnd);
 	hc->tx_cwnd_stamp = now;
 	hc->tx_cwnd_used  = 0;
 
-- 
cgit 


From 0dcb82254d65f72333aa50ad626d1e9665ad093b Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 7 Aug 2018 12:41:38 -0700
Subject: llc: use refcount_inc_not_zero() for llc_sap_find()

llc_sap_put() decreases the refcnt before deleting sap
from the global list. Therefore, there is a chance
llc_sap_find() could find a sap with zero refcnt
in this global list.

Close this race condition by checking if refcnt is zero
or not in llc_sap_find(), if it is zero then it is being
removed so we can just treat it as gone.

Reported-by: <syzbot+278893f3f7803871f7ce@syzkaller.appspotmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 89041260784c..260b3dc1b4a2 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value)
 
 	rcu_read_lock_bh();
 	sap = __llc_sap_find(sap_value);
-	if (sap)
-		llc_sap_hold(sap);
+	if (!sap || !llc_sap_hold_safe(sap))
+		sap = NULL;
 	rcu_read_unlock_bh();
 	return sap;
 }
-- 
cgit 


From 330bdcfadceea5e9a1526d731711e163f9a90975 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 8 Aug 2018 11:30:02 +0100
Subject: rxrpc: Fix the keepalive generator [ver #2]

AF_RXRPC has a keepalive message generator that generates a message for a
peer ~20s after the last transmission to that peer to keep firewall ports
open.  The implementation is incorrect in the following ways:

 (1) It mixes up ktime_t and time64_t types.

 (2) It uses ktime_get_real(), the output of which may jump forward or
     backward due to adjustments to the time of day.

 (3) If the current time jumps forward too much or jumps backwards, the
     generator function will crank the base of the time ring round one slot
     at a time (ie. a 1s period) until it catches up, spewing out VERSION
     packets as it goes.

Fix the problem by:

 (1) Only using time64_t.  There's no need for sub-second resolution.

 (2) Use ktime_get_seconds() rather than ktime_get_real() so that time
     isn't perceived to go backwards.

 (3) Simplifying rxrpc_peer_keepalive_worker() by splitting it into two
     parts:

     (a) The "worker" function that manages the buckets and the timer.

     (b) The "dispatch" function that takes the pending peers and
     	 potentially transmits a keepalive packet before putting them back
     	 in the ring into the slot appropriate to the revised last-Tx time.

 (4) Taking everything that's pending out of the ring and splicing it into
     a temporary collector list for processing.

     In the case that there's been a significant jump forward, the ring
     gets entirely emptied and then the time base can be warped forward
     before the peers are processed.

     The warping can't happen if the ring isn't empty because the slot a
     peer is in is keepalive-time dependent, relative to the base time.

 (5) Limit the number of iterations of the bucket array when scanning it.

 (6) Set the timer to skip any empty slots as there's no point waking up if
     there's nothing to do yet.

This can be triggered by an incoming call from a server after a reboot with
AF_RXRPC and AFS built into the kernel causing a peer record to be set up
before userspace is started.  The system clock is then adjusted by
userspace, thereby potentially causing the keepalive generator to have a
meltdown - which leads to a message like:

	watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/0:1:23]
	...
	Workqueue: krxrpcd rxrpc_peer_keepalive_worker
	EIP: lock_acquire+0x69/0x80
	...
	Call Trace:
	 ? rxrpc_peer_keepalive_worker+0x5e/0x350
	 ? _raw_spin_lock_bh+0x29/0x60
	 ? rxrpc_peer_keepalive_worker+0x5e/0x350
	 ? rxrpc_peer_keepalive_worker+0x5e/0x350
	 ? __lock_acquire+0x3d3/0x870
	 ? process_one_work+0x110/0x340
	 ? process_one_work+0x166/0x340
	 ? process_one_work+0x110/0x340
	 ? worker_thread+0x39/0x3c0
	 ? kthread+0xdb/0x110
	 ? cancel_delayed_work+0x90/0x90
	 ? kthread_stop+0x70/0x70
	 ? ret_from_fork+0x19/0x24

Fixes: ace45bec6d77 ("rxrpc: Fix firewall route keepalive")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h |   8 +--
 net/rxrpc/conn_event.c  |   4 +-
 net/rxrpc/net_ns.c      |   6 +-
 net/rxrpc/output.c      |  12 ++--
 net/rxrpc/peer_event.c  | 156 +++++++++++++++++++++++++++---------------------
 net/rxrpc/peer_object.c |   8 +--
 net/rxrpc/rxkad.c       |   4 +-
 7 files changed, 109 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5fb7d3254d9e..707630ab4713 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -104,9 +104,9 @@ struct rxrpc_net {
 
 #define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */
 	u8			peer_keepalive_cursor;
-	ktime_t			peer_keepalive_base;
-	struct hlist_head	peer_keepalive[RXRPC_KEEPALIVE_TIME + 1];
-	struct hlist_head	peer_keepalive_new;
+	time64_t		peer_keepalive_base;
+	struct list_head	peer_keepalive[32];
+	struct list_head	peer_keepalive_new;
 	struct timer_list	peer_keepalive_timer;
 	struct work_struct	peer_keepalive_work;
 };
@@ -295,7 +295,7 @@ struct rxrpc_peer {
 	struct hlist_head	error_targets;	/* targets for net error distribution */
 	struct work_struct	error_distributor;
 	struct rb_root		service_conns;	/* Service connections */
-	struct hlist_node	keepalive_link;	/* Link in net->peer_keepalive[] */
+	struct list_head	keepalive_link;	/* Link in net->peer_keepalive[] */
 	time64_t		last_tx_at;	/* Last time packet sent here */
 	seqlock_t		service_conn_lock;
 	spinlock_t		lock;		/* access lock */
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 8229a52c2acd..3fde001fcc39 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -136,7 +136,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	}
 
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 	if (ret < 0)
 		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
 				    rxrpc_tx_fail_call_final_resend);
@@ -245,7 +245,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 		return -EAGAIN;
 	}
 
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 
 	_leave(" = 0");
 	return 0;
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 5d6a773db973..417d80867c4f 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -85,12 +85,12 @@ static __net_init int rxrpc_init_net(struct net *net)
 	hash_init(rxnet->peer_hash);
 	spin_lock_init(&rxnet->peer_hash_lock);
 	for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++)
-		INIT_HLIST_HEAD(&rxnet->peer_keepalive[i]);
-	INIT_HLIST_HEAD(&rxnet->peer_keepalive_new);
+		INIT_LIST_HEAD(&rxnet->peer_keepalive[i]);
+	INIT_LIST_HEAD(&rxnet->peer_keepalive_new);
 	timer_setup(&rxnet->peer_keepalive_timer,
 		    rxrpc_peer_keepalive_timeout, 0);
 	INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker);
-	rxnet->peer_keepalive_base = ktime_add(ktime_get_real(), NSEC_PER_SEC);
+	rxnet->peer_keepalive_base = ktime_get_seconds();
 
 	ret = -ENOMEM;
 	rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f03de1c59ba3..4774c8f5634d 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -209,7 +209,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	now = ktime_get_real();
 	if (ping)
 		call->ping_time = now;
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
 				    rxrpc_tx_fail_call_ack);
@@ -296,7 +296,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 
 	ret = kernel_sendmsg(conn->params.local->socket,
 			     &msg, iov, 1, sizeof(pkt));
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
 				    rxrpc_tx_fail_call_abort);
@@ -391,7 +391,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	 *     message and update the peer record
 	 */
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 
 	up_read(&conn->params.local->defrag_sem);
 	if (ret < 0)
@@ -457,7 +457,7 @@ send_fragmentable:
 		if (ret == 0) {
 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
 					     iov, 2, len);
-			conn->params.peer->last_tx_at = ktime_get_real();
+			conn->params.peer->last_tx_at = ktime_get_seconds();
 
 			opt = IP_PMTUDISC_DO;
 			kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -475,7 +475,7 @@ send_fragmentable:
 		if (ret == 0) {
 			ret = kernel_sendmsg(conn->params.local->socket, &msg,
 					     iov, 2, len);
-			conn->params.peer->last_tx_at = ktime_get_real();
+			conn->params.peer->last_tx_at = ktime_get_seconds();
 
 			opt = IPV6_PMTUDISC_DO;
 			kernel_setsockopt(conn->params.local->socket,
@@ -599,6 +599,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
 		trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
 				    rxrpc_tx_fail_version_keepalive);
 
-	peer->last_tx_at = ktime_get_real();
+	peer->last_tx_at = ktime_get_seconds();
 	_leave("");
 }
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 0ed8b651cec2..4f9da2f51c69 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -350,97 +350,117 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
 }
 
 /*
- * Perform keep-alive pings with VERSION packets to keep any NAT alive.
+ * Perform keep-alive pings.
  */
-void rxrpc_peer_keepalive_worker(struct work_struct *work)
+static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
+					  struct list_head *collector,
+					  time64_t base,
+					  u8 cursor)
 {
-	struct rxrpc_net *rxnet =
-		container_of(work, struct rxrpc_net, peer_keepalive_work);
 	struct rxrpc_peer *peer;
-	unsigned long delay;
-	ktime_t base, now = ktime_get_real();
-	s64 diff;
-	u8 cursor, slot;
+	const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
+	time64_t keepalive_at;
+	int slot;
 
-	base = rxnet->peer_keepalive_base;
-	cursor = rxnet->peer_keepalive_cursor;
+	spin_lock_bh(&rxnet->peer_hash_lock);
 
-	_enter("%u,%lld", cursor, ktime_sub(now, base));
+	while (!list_empty(collector)) {
+		peer = list_entry(collector->next,
+				  struct rxrpc_peer, keepalive_link);
 
-next_bucket:
-	diff = ktime_to_ns(ktime_sub(now, base));
-	if (diff < 0)
-		goto resched;
+		list_del_init(&peer->keepalive_link);
+		if (!rxrpc_get_peer_maybe(peer))
+			continue;
 
-	_debug("at %u", cursor);
-	spin_lock_bh(&rxnet->peer_hash_lock);
-next_peer:
-	if (!rxnet->live) {
 		spin_unlock_bh(&rxnet->peer_hash_lock);
-		goto out;
-	}
 
-	/* Everything in the bucket at the cursor is processed this second; the
-	 * bucket at cursor + 1 goes now + 1s and so on...
-	 */
-	if (hlist_empty(&rxnet->peer_keepalive[cursor])) {
-		if (hlist_empty(&rxnet->peer_keepalive_new)) {
-			spin_unlock_bh(&rxnet->peer_hash_lock);
-			goto emptied_bucket;
+		keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
+		slot = keepalive_at - base;
+		_debug("%02x peer %u t=%d {%pISp}",
+		       cursor, peer->debug_id, slot, &peer->srx.transport);
+
+		if (keepalive_at <= base ||
+		    keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
+			rxrpc_send_keepalive(peer);
+			slot = RXRPC_KEEPALIVE_TIME;
 		}
 
-		hlist_move_list(&rxnet->peer_keepalive_new,
-				&rxnet->peer_keepalive[cursor]);
+		/* A transmission to this peer occurred since last we examined
+		 * it so put it into the appropriate future bucket.
+		 */
+		slot += cursor;
+		slot &= mask;
+		spin_lock_bh(&rxnet->peer_hash_lock);
+		list_add_tail(&peer->keepalive_link,
+			      &rxnet->peer_keepalive[slot & mask]);
+		rxrpc_put_peer(peer);
 	}
 
-	peer = hlist_entry(rxnet->peer_keepalive[cursor].first,
-			   struct rxrpc_peer, keepalive_link);
-	hlist_del_init(&peer->keepalive_link);
-	if (!rxrpc_get_peer_maybe(peer))
-		goto next_peer;
-
 	spin_unlock_bh(&rxnet->peer_hash_lock);
+}
 
-	_debug("peer %u {%pISp}", peer->debug_id, &peer->srx.transport);
+/*
+ * Perform keep-alive pings with VERSION packets to keep any NAT alive.
+ */
+void rxrpc_peer_keepalive_worker(struct work_struct *work)
+{
+	struct rxrpc_net *rxnet =
+		container_of(work, struct rxrpc_net, peer_keepalive_work);
+	const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
+	time64_t base, now, delay;
+	u8 cursor, stop;
+	LIST_HEAD(collector);
 
-recalc:
-	diff = ktime_divns(ktime_sub(peer->last_tx_at, base), NSEC_PER_SEC);
-	if (diff < -30 || diff > 30)
-		goto send; /* LSW of 64-bit time probably wrapped on 32-bit */
-	diff += RXRPC_KEEPALIVE_TIME - 1;
-	if (diff < 0)
-		goto send;
+	now = ktime_get_seconds();
+	base = rxnet->peer_keepalive_base;
+	cursor = rxnet->peer_keepalive_cursor;
+	_enter("%lld,%u", base - now, cursor);
 
-	slot = (diff > RXRPC_KEEPALIVE_TIME - 1) ? RXRPC_KEEPALIVE_TIME - 1 : diff;
-	if (slot == 0)
-		goto send;
+	if (!rxnet->live)
+		return;
 
-	/* A transmission to this peer occurred since last we examined it so
-	 * put it into the appropriate future bucket.
+	/* Remove to a temporary list all the peers that are currently lodged
+	 * in expired buckets plus all new peers.
+	 *
+	 * Everything in the bucket at the cursor is processed this
+	 * second; the bucket at cursor + 1 goes at now + 1s and so
+	 * on...
 	 */
-	slot = (slot + cursor) % ARRAY_SIZE(rxnet->peer_keepalive);
 	spin_lock_bh(&rxnet->peer_hash_lock);
-	hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive[slot]);
-	rxrpc_put_peer(peer);
-	goto next_peer;
-
-send:
-	rxrpc_send_keepalive(peer);
-	now = ktime_get_real();
-	goto recalc;
+	list_splice_init(&rxnet->peer_keepalive_new, &collector);
+
+	stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
+	while (base <= now && (s8)(cursor - stop) < 0) {
+		list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
+				      &collector);
+		base++;
+		cursor++;
+	}
 
-emptied_bucket:
-	cursor++;
-	if (cursor >= ARRAY_SIZE(rxnet->peer_keepalive))
-		cursor = 0;
-	base = ktime_add_ns(base, NSEC_PER_SEC);
-	goto next_bucket;
+	base = now;
+	spin_unlock_bh(&rxnet->peer_hash_lock);
 
-resched:
 	rxnet->peer_keepalive_base = base;
 	rxnet->peer_keepalive_cursor = cursor;
-	delay = nsecs_to_jiffies(-diff) + 1;
-	timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
-out:
+	rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
+	ASSERT(list_empty(&collector));
+
+	/* Schedule the timer for the next occupied timeslot. */
+	cursor = rxnet->peer_keepalive_cursor;
+	stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
+	for (; (s8)(cursor - stop) < 0; cursor++) {
+		if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
+			break;
+		base++;
+	}
+
+	now = ktime_get_seconds();
+	delay = base - now;
+	if (delay < 1)
+		delay = 1;
+	delay *= HZ;
+	if (rxnet->live)
+		timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
+
 	_leave("");
 }
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 1b7e8107b3ae..24ec7cdcf332 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -322,7 +322,7 @@ struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
 	if (!peer) {
 		peer = prealloc;
 		hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
-		hlist_add_head(&peer->keepalive_link, &rxnet->peer_keepalive_new);
+		list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
 	}
 
 	spin_unlock(&rxnet->peer_hash_lock);
@@ -367,8 +367,8 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
 		if (!peer) {
 			hash_add_rcu(rxnet->peer_hash,
 				     &candidate->hash_link, hash_key);
-			hlist_add_head(&candidate->keepalive_link,
-				       &rxnet->peer_keepalive_new);
+			list_add_tail(&candidate->keepalive_link,
+				      &rxnet->peer_keepalive_new);
 		}
 
 		spin_unlock_bh(&rxnet->peer_hash_lock);
@@ -441,7 +441,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
 
 	spin_lock_bh(&rxnet->peer_hash_lock);
 	hash_del_rcu(&peer->hash_link);
-	hlist_del_init(&peer->keepalive_link);
+	list_del_init(&peer->keepalive_link);
 	spin_unlock_bh(&rxnet->peer_hash_lock);
 
 	kfree_rcu(peer, rcu);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 278ac0807a60..47cb019c521a 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -669,7 +669,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 		return -EAGAIN;
 	}
 
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 	_leave(" = 0");
 	return 0;
 }
@@ -725,7 +725,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 		return -EAGAIN;
 	}
 
-	conn->params.peer->last_tx_at = ktime_get_real();
+	conn->params.peer->last_tx_at = ktime_get_seconds();
 	_leave(" = 0");
 	return 0;
 }
-- 
cgit 


From caa21e19e08d7a1445116a93f7ab4e187ebbbadb Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Wed, 8 Aug 2018 14:13:19 +0200
Subject: net/smc: no shutdown in state SMC_LISTEN

Invoking shutdown for a socket in state SMC_LISTEN does not make
sense. Nevertheless programs like syzbot fuzzing the kernel may
try to do this. For SMC this means a socket refcounting problem.
This patch makes sure a shutdown call for an SMC socket in state
SMC_LISTEN simply returns with -ENOTCONN.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 05e4ffe5aabd..1288c7bf40d5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1397,8 +1397,7 @@ static int smc_shutdown(struct socket *sock, int how)
 	lock_sock(sk);
 
 	rc = -ENOTCONN;
-	if ((sk->sk_state != SMC_LISTEN) &&
-	    (sk->sk_state != SMC_ACTIVE) &&
+	if ((sk->sk_state != SMC_ACTIVE) &&
 	    (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
 	    (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
 	    (sk->sk_state != SMC_APPCLOSEWAIT1) &&
-- 
cgit 


From bd58c7e0860f54710907903ed6daff699d1fc5b9 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Wed, 8 Aug 2018 14:13:20 +0200
Subject: net/smc: allow sysctl rmem and wmem defaults for servers

Without setsockopt SO_SNDBUF and SO_RCVBUF settings, the sysctl
defaults net.ipv4.tcp_wmem and net.ipv4.tcp_rmem should be the base
for the sizes of the SMC sndbuf and rcvbuf. Any TCP buffer size
optimizations for servers should be ignored.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1288c7bf40d5..0ee7721afbe5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1122,6 +1122,8 @@ static void smc_tcp_listen_work(struct work_struct *work)
 		sock_hold(lsk); /* sock_put in smc_listen_work */
 		INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
 		smc_copy_sock_settings_to_smc(new_smc);
+		new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
+		new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
 		sock_hold(&new_smc->sk); /* sock_put in passive closing */
 		if (!schedule_work(&new_smc->smc_listen_work))
 			sock_put(&new_smc->sk);
-- 
cgit 


From 7311d665ca68907b9c43d6d1021f816f9a7bbd57 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Wed, 8 Aug 2018 14:13:21 +0200
Subject: net/smc: move sock lock in smc_ioctl()

When an SMC socket is connecting it is decided whether fallback to
TCP is needed. To avoid races between connect and ioctl move the
sock lock before the use_fallback check.

Reported-by: syzbot+5b2cece1a8ecb2ca77d8@syzkaller.appspotmail.com
Reported-by: syzbot+19557374321ca3710990@syzkaller.appspotmail.com
Fixes: 1992d99882af ("net/smc: take sock lock in smc_ioctl()")
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 0ee7721afbe5..e7de5f282722 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1522,12 +1522,16 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
 
 	smc = smc_sk(sock->sk);
 	conn = &smc->conn;
+	lock_sock(&smc->sk);
 	if (smc->use_fallback) {
-		if (!smc->clcsock)
+		if (!smc->clcsock) {
+			release_sock(&smc->sk);
 			return -EBADF;
-		return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
+		}
+		answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
+		release_sock(&smc->sk);
+		return answ;
 	}
-	lock_sock(&smc->sk);
 	switch (cmd) {
 	case SIOCINQ: /* same as FIONREAD */
 		if (smc->sk.sk_state == SMC_LISTEN) {
-- 
cgit 


From 1be52e97ed3e524f82e25d6e53f48df3c6e85282 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 8 Aug 2018 20:56:40 +0200
Subject: dsa: slave: eee: Allow ports to use phylink

For a port to be able to use EEE, both the MAC and the PHY must
support EEE. A phy can be provided by both a phydev or phylink. Verify
at least one of these exist, not just phydev.

Fixes: aab9c4067d23 ("net: dsa: Plug in PHYLINK support")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 732369c80644..9864bcd3d317 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -639,7 +639,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	int ret;
 
 	/* Port's PHY and MAC both need to be EEE capable */
-	if (!dev->phydev)
+	if (!dev->phydev && !dp->pl)
 		return -ENODEV;
 
 	if (!ds->ops->set_mac_eee)
@@ -659,7 +659,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	int ret;
 
 	/* Port's PHY and MAC both need to be EEE capable */
-	if (!dev->phydev)
+	if (!dev->phydev && !dp->pl)
 		return -ENODEV;
 
 	if (!ds->ops->get_mac_eee)
-- 
cgit