From 7dafbab3753fcf59bc81748e5b2c5bf04e1c62c7 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:19:55 -0700 Subject: IB/hfi1: Add functions to parse BTH/IB headers Improve code readablity by adding inline functions to read specific BTH/IB fields without knowledge of byte offsets. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_hdrs.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 ++ include/rdma/rdmavt_qp.h | 2 +- 3 files changed, 87 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 5519f31f043a..c124d515f7d5 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -193,8 +193,12 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) #define IB_LNH_MASK 3 #define IB_SC_MASK 0xf #define IB_SC_SHIFT 12 +#define IB_SC5_MASK 0x10 #define IB_SL_MASK 0xf #define IB_SL_SHIFT 4 +#define IB_SL_SHIFT 4 +#define IB_LVER_MASK 0xf +#define IB_LVER_SHIFT 8 static inline u8 ib_get_lnh(struct ib_header *hdr) { @@ -206,6 +210,11 @@ static inline u8 ib_get_sc(struct ib_header *hdr) return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); } +static inline bool ib_is_sc5(u16 sc5) +{ + return !!(sc5 & IB_SC5_MASK); +} + static inline u8 ib_get_sl(struct ib_header *hdr) { return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); @@ -221,6 +230,27 @@ static inline u16 ib_get_slid(struct ib_header *hdr) return (be16_to_cpu(hdr->lrh[3])); } +static inline u8 ib_get_lver(struct ib_header *hdr) +{ + return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) & + IB_LVER_MASK); +} + +static inline u16 ib_get_len(struct ib_header *hdr) +{ + return (u16)(be16_to_cpu(hdr->lrh[2])); +} + +static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) +{ + return be32_to_cpu(ohdr->u.ud.deth[0]); +} + +static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK); +} + /* * BTH */ @@ -229,6 +259,14 @@ static inline u16 ib_get_slid(struct ib_header *hdr) #define IB_BTH_PAD_MASK 3 #define IB_BTH_PKEY_MASK 0xffff #define IB_BTH_PAD_SHIFT 20 +#define IB_BTH_A_MASK 1 +#define IB_BTH_A_SHIFT 31 +#define IB_BTH_M_MASK 1 +#define IB_BTH_M_SHIFT 22 +#define IB_BTH_SE_MASK 1 +#define IB_BTH_SE_SHIFT 23 +#define IB_BTH_TVER_MASK 0xf +#define IB_BTH_TVER_SHIFT 16 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { @@ -247,4 +285,50 @@ static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) IB_BTH_OPCODE_MASK); } +static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) & + IB_BTH_A_MASK); +} + +static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) & + IB_BTH_M_MASK); +} + +static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) & + IB_BTH_SE_MASK); +} + +static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr) +{ + return (u32)(be32_to_cpu(ohdr->bth[2])); +} + +static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr) +{ + return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK); +} + +static inline u8 ib_bth_get_becn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK); +} + +static inline u8 ib_bth_get_fecn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK); +} + +static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT) & + IB_BTH_TVER_MASK); +} + #endif /* IB_HDRS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ba8314ec5768..8f1ce4e27bbd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -664,6 +664,8 @@ union rdma_network_hdr { }; }; +#define IB_QPN_MASK 0xFFFFFF + enum { IB_MULTICAST_QPN = 0xffffff }; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index be6472e5b06b..13f43b3527a8 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -396,7 +396,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) -#define RVT_QPN_MASK 0xFFFFFF +#define RVT_QPN_MASK IB_QPN_MASK /* * QPN-map pages start out as NULL, they get allocated upon -- cgit From 14fe13fcd3afb96b06809f280b586be1c998332c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:20:31 -0700 Subject: IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok() SGEs that are contiguous needlessly consume driver dependent TX resources. The lkey validation logic is enhanced to compress the SGE that ends up in the send wqe when consecutive addresses are detected. The lkey validation API used to return 1 (success) or 0 (fail). The return value is now an -errno, 0 (compressed), or 1 (uncompressed). A additional argument is added to pass the last SQE for the compression. Loopback callers always pass a NULL to last_sge since the optimization is of little benefit in that situation. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4878aaf7bdff..d0b9f91e5f4d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -515,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc); + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, u16 lid); -- cgit From cb49366f3616fdf197893c24a5b2677b8c26ce29 Mon Sep 17 00:00:00 2001 From: "Vishwanathapura, Niranjana" Date: Thu, 1 Jun 2017 17:04:02 -0700 Subject: IB/core,rdmavt,hfi1,opa-vnic: Send OPA cap_mask3 in trap Provide the ability for IB clients to modify the OPA specific capability mask and include this mask in the subsequent trap data. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Michael N. Henry Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 3 ++- include/rdma/rdma_vt.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8f1ce4e27bbd..9d4d2a74c95e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -577,7 +577,8 @@ struct ib_device_modify { enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), - IB_PORT_RESET_QKEY_CNTR = (1<<3) + IB_PORT_RESET_QKEY_CNTR = (1<<3), + IB_PORT_OPA_MASK_CHG = (1<<4) }; struct ib_port_modify { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d0b9f91e5f4d..0f18ffd98dd7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -75,6 +75,7 @@ struct rvt_ibport { __be64 mkey; u64 tid; u32 port_cap_flags; + u16 port_cap3_flags; u32 pma_sample_start; u32 pma_sample_interval; __be16 pma_counter_select[5]; -- cgit From 250bbbdbed93ca27c9f4d445c960a87d9f7e2044 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 20 Jun 2017 21:52:08 +0200 Subject: iio: common: st_sensors: move st_sensors_of_i2c_probe() in common code Move st_sensors_of_i2c_probe() in st_sensors_core and rename it in st_sensors_of_name_probe(). That change is necessary to add device-tree support in spi code otherwise the rest of the autodetection will fail since spi->modalias (and indio_dev->name) will be set using compatible string value that differs from standard sensor name Signed-off-by: Lorenzo Bianconi Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 12 ++++++++++++ include/linux/iio/common/st_sensors_i2c.h | 10 ---------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 497f2b3a5a62..1f8211b6438b 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -325,4 +325,16 @@ ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev, ssize_t st_sensors_sysfs_scale_avail(struct device *dev, struct device_attribute *attr, char *buf); +#ifdef CONFIG_OF +void st_sensors_of_name_probe(struct device *dev, + const struct of_device_id *match, + char *name, int len); +#else +static inline void st_sensors_of_name_probe(struct device *dev, + const struct of_device_id *match, + char *name, int len) +{ +} +#endif + #endif /* ST_SENSORS_H */ diff --git a/include/linux/iio/common/st_sensors_i2c.h b/include/linux/iio/common/st_sensors_i2c.h index 254de3c7dde8..0a2c25e06d1f 100644 --- a/include/linux/iio/common/st_sensors_i2c.h +++ b/include/linux/iio/common/st_sensors_i2c.h @@ -18,16 +18,6 @@ void st_sensors_i2c_configure(struct iio_dev *indio_dev, struct i2c_client *client, struct st_sensor_data *sdata); -#ifdef CONFIG_OF -void st_sensors_of_i2c_probe(struct i2c_client *client, - const struct of_device_id *match); -#else -static inline void st_sensors_of_i2c_probe(struct i2c_client *client, - const struct of_device_id *match) -{ -} -#endif - #ifdef CONFIG_ACPI int st_sensors_match_acpi_device(struct device *dev); #else -- cgit From 5ea0727b163cb5575e36397a12eade68a1f35f24 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Wed, 14 Jun 2017 18:12:01 -0700 Subject: x86/syscalls: Check address limit on user-mode return Ensure the address limit is a user-mode segment before returning to user-mode. Otherwise a process can corrupt kernel-mode memory and elevate privileges [1]. The set_fs function sets the TIF_SETFS flag to force a slow path on return. In the slow path, the address limit is checked to be USER_DS if needed. The addr_limit_user_check function is added as a cross-architecture function to check the address limit. [1] https://bugs.chromium.org/p/project-zero/issues/detail?id=990 Signed-off-by: Thomas Garnier Signed-off-by: Thomas Gleixner Cc: Mark Rutland Cc: kernel-hardening@lists.openwall.com Cc: Catalin Marinas Cc: Will Deacon Cc: David Howells Cc: Dave Hansen Cc: Miroslav Benes Cc: Chris Metcalf Cc: Pratyush Anand Cc: Russell King Cc: Petr Mladek Cc: Rik van Riel Cc: Kees Cook Cc: Arnd Bergmann Cc: Al Viro Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: linux-arm-kernel@lists.infradead.org Cc: Will Drewry Cc: linux-api@vger.kernel.org Cc: Oleg Nesterov Cc: Andy Lutomirski Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/20170615011203.144108-1-thgarnie@google.com --- include/linux/syscalls.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 980c3c9b06f8..ac0cf6fb25d6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -206,6 +206,22 @@ extern struct trace_event_functions exit_syscall_print_funcs; } \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) +#ifdef TIF_FSCHECK +/* + * Called before coming back to user-mode. Returning to user-mode with an + * address limit different than USER_DS can allow to overwrite kernel memory. + */ +static inline void addr_limit_user_check(void) +{ + + if (!test_thread_flag(TIF_FSCHECK)) + return; + + BUG_ON(!segment_eq(get_fs(), USER_DS)); + clear_thread_flag(TIF_FSCHECK); +} +#endif + asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr); asmlinkage long sys_time(time_t __user *tloc); -- cgit From 9b1404c24a357332cb2a6df7c4337e943a4545fd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Jul 2017 10:34:35 -0400 Subject: msgrcv(2), msgsnd(2): move compat to native Signed-off-by: Al Viro --- include/linux/msg.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index f3f302f9c197..4e5ec3cbf464 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -31,12 +31,4 @@ struct msg_queue { struct list_head q_senders; }; -/* Helper routines for sys_msgsnd and sys_msgrcv */ -extern long do_msgsnd(int msqid, long mtype, void __user *mtext, - size_t msgsz, int msgflg); -extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, - int msgflg, - long (*msg_fill)(void __user *, struct msg_msg *, - size_t)); - #endif /* _LINUX_MSG_H */ -- cgit From 44ee454670122a959112caaa7aad86d8cacab1ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Jul 2017 10:50:14 -0400 Subject: semtimedop(): move compat to native ... and finally kill the sodding compat_convert_timespec() Signed-off-by: Al Viro --- include/linux/compat.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 5a6a109b4a50..edae425ca8c0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -171,15 +171,6 @@ extern int get_compat_itimerspec64(struct itimerspec64 *its, extern int put_compat_itimerspec64(const struct itimerspec64 *its, struct compat_itimerspec __user *uits); -/* - * This function convert a timespec if necessary and returns a *user - * space* pointer. If no conversion is necessary, it returns the - * initial pointer. NULL is a legitimate argument and will always - * output NULL. - */ -extern int compat_convert_timespec(struct timespec __user **, - const void __user *); - struct compat_iovec { compat_uptr_t iov_base; compat_size_t iov_len; -- cgit From 9d5b86ac13c573795525ecac6ed2db39ab23e2a8 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Sun, 16 Jul 2017 10:28:22 -0400 Subject: fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks Since commit c69899a17ca4 "NFSv4: Update of VFS byte range lock must be atomic with the stateid update", NFSv4 has been inserting locks in rpciod worker context. The result is that the file_lock's fl_nspid is the kworker's pid instead of the original userspace pid. The fl_nspid is only used to represent the namespaced virtual pid number when displaying locks or returning from F_GETLK. There's no reason to set it for every inserted lock, since we can usually just look it up from fl_pid. So, instead of looking up and holding struct pid for every lock, let's just look up the virtual pid number from fl_pid when it is needed. That means we can remove fl_nspid entirely. The translaton and presentation of fl_pid should handle the following four cases: 1 - F_GETLK on a remote file with a remote lock: In this case, the filesystem should determine the l_pid to return here. Filesystems should indicate that the fl_pid represents a non-local pid value that should not be translated by returning an fl_pid <= 0. 2 - F_GETLK on a local file with a remote lock: This should be the l_pid of the lock manager process, and translated. 3 - F_GETLK on a remote file with a local lock, and 4 - F_GETLK on a local file with a local lock: These should be the translated l_pid of the local locking process. Fuse was already doing the correct thing by translating the pid into the caller's namespace. With this change we must update fuse to translate to init's pid namespace, so that the locks API can then translate from init's pid namespace into the pid namespace of the caller. With this change, the locks API will expect that if a filesystem returns a remote pid as opposed to a local pid for F_GETLK, that remote pid will be <= 0. This signifies that the pid is remote, and the locks API will forego translating that pid into the pid namespace of the local calling process. Finally, we convert remote filesystems to present remote pids using negative numbers. Have lustre, 9p, ceph, cifs, and dlm negate the remote pid returned for F_GETLK lock requests. Since local pids will never be larger than PID_MAX_LIMIT (which is currently defined as <= 4 million), but pid_t is an unsigned int, we should have plenty of room to represent remote pids with negative numbers if we assume that remote pid numbers are similarly limited. If this is not the case, then we run the risk of having a remote pid returned for which there is also a corresponding local pid. This is a problem we have now, but this patch should reduce the chances of that occurring, while also returning those remote pid numbers, for whatever that may be worth. Signed-off-by: Benjamin Coddington Signed-off-by: Jeff Layton --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b5d6816542b..f0b108af9b02 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -999,7 +999,6 @@ struct file_lock { unsigned char fl_type; unsigned int fl_pid; int fl_link_cpu; /* what cpu's list is this on? */ - struct pid *fl_nspid; wait_queue_head_t fl_wait; struct file *fl_file; loff_t fl_start; -- cgit From ccdb2d17df9f07c291d43b0aeea7c90e4c020489 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 15 Jul 2017 19:40:20 +0200 Subject: ip6: fix PMTU discovery when using /127 subnets The definition of an "anycast destination address" has been tweaked as a side-effect of commit 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST"). The first address of a point-to-point /127 subnet is now considered as an anycast address. This prevents ICMPv6 errors to be returned to a sender of such a subnet and breaks PMTU discovery. This can be reproduced with: ip link add name out6 type veth peer name in6 ip link add name out7 type veth peer name in7 ip link set mtu 1400 dev out7 ip link set mtu 1400 dev in7 ip netns add next-hop ip netns add next-next-hop ip link set netns next-hop dev in6 ip link set netns next-hop dev out7 ip link set netns next-next-hop dev in7 ip link set up dev out6 ip addr add 2001:db8:1::12/127 dev out6 ip netns exec next-hop ip link set up dev in6 ip netns exec next-hop ip link set up dev out7 ip netns exec next-hop ip addr add 2001:db8:1::13/127 dev in6 ip netns exec next-hop ip addr add 2001:db8:1::14/127 dev out7 ip netns exec next-hop ip route add default via 2001:db8:1::15 ip netns exec next-hop sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec next-next-hop ip link set up dev in7 ip netns exec next-next-hop ip addr add 2001:db8:1::15/127 dev in7 ip netns exec next-next-hop ip addr add 2001:db8:1::50/128 dev in7 ip netns exec next-next-hop ip route add default via 2001:db8:1::14 ip netns exec next-next-hop sysctl -qw net.ipv6.conf.all.forwarding=1 ip route add 2001:db8:1::48/123 via 2001:db8:1::13 sleep 4 ping -M do -s 1452 -c 3 2001:db8:1::50 || true ip route get 2001:db8:1::50 Before the patch, we get: 2001:db8:1::50 from :: via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 1024 pref medium After the patch, we get: 2001:db8:1::50 via 2001:db8:1::13 dev out6 src 2001:db8:1::12 metric 0 cache expires 578sec mtu 1400 pref medium Fixes: 2647a9b07032 ("ipv6: Remove external dependency on rt6i_gateway and RTF_ANYCAST") Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 199056933dcb..907d39a42f6b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -194,7 +194,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, struct rt6_info *rt = (struct rt6_info *)dst; return rt->rt6i_flags & RTF_ANYCAST || - (rt->rt6i_dst.plen != 128 && + (rt->rt6i_dst.plen < 127 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } -- cgit From 788b950c62e06b02278a0fd380e1a0667996ce3c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 16 Jul 2017 21:43:33 -0400 Subject: cgroup: distinguish local and children populated states cgrp->populated_cnt counts both local (the cgroup's populated css_sets) and subtree proper (populated children) so that it's only zero when the whole subtree, including self, is empty. This patch splits the counter into two so that local and children populated states are tracked separately. It allows finer-grained tests on the state of the hierarchy which will be used to replace css_set walking local populated test. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 13 +++++++++---- include/linux/cgroup.h | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 09f4c7df1478..ae7bc1e70085 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -263,11 +263,16 @@ struct cgroup { /* * Each non-empty css_set associated with this cgroup contributes - * one to populated_cnt. All children with non-zero popuplated_cnt - * of their own contribute one. The count is zero iff there's no - * task in this cgroup or its subtree. + * one to nr_populated_csets. The counter is zero iff this cgroup + * doesn't have any tasks. + * + * All children which have non-zero nr_populated_csets and/or + * nr_populated_children of their own contribute one to + * nr_populated_children. The counter is zero iff this cgroup's + * subtree proper doesn't have any tasks. */ - int populated_cnt; + int nr_populated_csets; + int nr_populated_children; struct kernfs_node *kn; /* cgroup kernfs entry */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 710a005c6b7a..308b10797a54 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -537,7 +537,7 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { - return cgrp->populated_cnt; + return cgrp->nr_populated_csets + cgrp->nr_populated_children; } /* returns ino associated with a cgroup */ -- cgit From a38905e6aa1a758af003b80f3318196eadb86dfe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:49 +0800 Subject: sctp: remove the typedef sctp_ipv4addr_param_t This patch is to remove the typedef sctp_ipv4addr_param_t, and replace with struct sctp_ipv4addr_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 99e866487e2f..e42095d7ce57 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -273,10 +273,10 @@ struct sctp_init_chunk { /* Section 3.3.2.1. IPv4 Address Parameter (5) */ -typedef struct sctp_ipv4addr_param { +struct sctp_ipv4addr_param { struct sctp_paramhdr param_hdr; - struct in_addr addr; -} sctp_ipv4addr_param_t; + struct in_addr addr; +}; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ typedef struct sctp_ipv6addr_param { -- cgit From 00987cc07e3f0f01699800cd89adf13a908cdee5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:50 +0800 Subject: sctp: remove the typedef sctp_ipv6addr_param_t This patch is to remove the typedef sctp_ipv6addr_param_t, and replace with struct sctp_ipv6addr_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index e42095d7ce57..6b45c8a38642 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -279,10 +279,10 @@ struct sctp_ipv4addr_param { }; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ -typedef struct sctp_ipv6addr_param { +struct sctp_ipv6addr_param { struct sctp_paramhdr param_hdr; struct in6_addr addr; -} sctp_ipv6addr_param_t; +}; /* Section 3.3.2.1 Cookie Preservative (9) */ typedef struct sctp_cookie_preserve_param { -- cgit From 365ddb65e77f6b99d4aba09e0d8a096aada57815 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:51 +0800 Subject: sctp: remove the typedef sctp_cookie_preserve_param_t This patch is to remove the typedef sctp_cookie_preserve_param_t, and replace with struct sctp_cookie_preserve_param in the places where it's using this typedef. It is also to fix some indents in sctp_sf_do_5_2_6_stale(). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 6b45c8a38642..d8f9d8f8649b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -285,10 +285,10 @@ struct sctp_ipv6addr_param { }; /* Section 3.3.2.1 Cookie Preservative (9) */ -typedef struct sctp_cookie_preserve_param { +struct sctp_cookie_preserve_param { struct sctp_paramhdr param_hdr; - __be32 lifespan_increment; -} sctp_cookie_preserve_param_t; + __be32 lifespan_increment; +}; /* Section 3.3.2.1 Host Name Address (11) */ typedef struct sctp_hostname_param { -- cgit From df9af0063f154c1a4f22a5570749d185d080cf56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:52 +0800 Subject: sctp: remove the typedef sctp_hostname_param_t Remove this typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d8f9d8f8649b..c43e9067d41a 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -291,10 +291,10 @@ struct sctp_cookie_preserve_param { }; /* Section 3.3.2.1 Host Name Address (11) */ -typedef struct sctp_hostname_param { +struct sctp_hostname_param { struct sctp_paramhdr param_hdr; uint8_t hostname[0]; -} sctp_hostname_param_t; +}; /* Section 3.3.2.1 Supported Address Types (12) */ typedef struct sctp_supported_addrs_param { -- cgit From e925d506f1a21f7fd24a8fdd3e73e0810c655de4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:53 +0800 Subject: sctp: remove the typedef sctp_supported_addrs_param_t This patch is to remove the typedef sctp_supported_addrs_param_t, and replace with struct sctp_supported_addrs_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c43e9067d41a..3ca3ab7302a6 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -297,10 +297,10 @@ struct sctp_hostname_param { }; /* Section 3.3.2.1 Supported Address Types (12) */ -typedef struct sctp_supported_addrs_param { +struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; __be16 types[0]; -} sctp_supported_addrs_param_t; +}; /* Appendix A. ECN Capable (32768) */ typedef struct sctp_ecn_capable_param { -- cgit From c1dd5df39be5a98c843b9352c22c5569f84bec44 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:54 +0800 Subject: sctp: remove struct sctp_ecn_capable_param Remove it, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 3ca3ab7302a6..75524829aa81 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -302,11 +302,6 @@ struct sctp_supported_addrs_param { __be16 types[0]; }; -/* Appendix A. ECN Capable (32768) */ -typedef struct sctp_ecn_capable_param { - struct sctp_paramhdr param_hdr; -} sctp_ecn_capable_param_t; - /* ADDIP Section 3.2.6 Adaptation Layer Indication */ typedef struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; -- cgit From 85f6bd24ac579ef0926eb4c564ba1f3c8a7f8563 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:55 +0800 Subject: sctp: remove the typedef sctp_adaptation_ind_param_t This patch is to remove the typedef sctp_adaptation_ind_param_t, and replace with struct sctp_adaptation_ind_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 75524829aa81..72b87874ea76 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -303,10 +303,10 @@ struct sctp_supported_addrs_param { }; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ -typedef struct sctp_adaptation_ind_param { +struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; __be32 adaptation_ind; -} sctp_adaptation_ind_param_t; +}; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ typedef struct sctp_supported_ext_param { -- cgit From 15328d9feede450d64ff77cac5d25bc734ec8b27 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:56 +0800 Subject: sctp: remove the typedef sctp_supported_ext_param_t This patch is to remove the typedef sctp_supported_ext_param_t, and replace with struct sctp_supported_ext_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 72b87874ea76..76245685f923 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -309,10 +309,10 @@ struct sctp_adaptation_ind_param { }; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ -typedef struct sctp_supported_ext_param { +struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_supported_ext_param_t; +}; /* AUTH Section 3.1 Random */ typedef struct sctp_random_param { -- cgit From b02db702face3791889a4fcf06691c086648ee89 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:57 +0800 Subject: sctp: remove the typedef sctp_random_param_t This patch is to remove the typedef sctp_random_param_t, and replace with struct sctp_random_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- include/net/sctp/structs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 76245685f923..9b1aa3907c9e 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -315,10 +315,10 @@ struct sctp_supported_ext_param { }; /* AUTH Section 3.1 Random */ -typedef struct sctp_random_param { +struct sctp_random_param { struct sctp_paramhdr param_hdr; __u8 random_val[0]; -} sctp_random_param_t; +}; /* AUTH Section 3.2 Chunk List */ typedef struct sctp_chunks_param { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5ab29af8ca8a..f22c079fd1f1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1556,7 +1556,7 @@ struct sctp_association { * and authenticated chunk list. All that is part of the * cookie and these are just pointers to those locations */ - sctp_random_param_t *peer_random; + struct sctp_random_param *peer_random; sctp_chunks_param_t *peer_chunks; sctp_hmac_algo_param_t *peer_hmacs; } peer; -- cgit From a762a9d94d44980e3690f9de87b918376daa6428 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:58 +0800 Subject: sctp: remove the typedef sctp_chunks_param_t This patch is to remove the typedef sctp_chunks_param_t, and replace with struct sctp_chunks_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- include/net/sctp/structs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 9b1aa3907c9e..b52def9bcfa1 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -321,10 +321,10 @@ struct sctp_random_param { }; /* AUTH Section 3.2 Chunk List */ -typedef struct sctp_chunks_param { +struct sctp_chunks_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} sctp_chunks_param_t; +}; /* AUTH Section 3.3 HMAC Algorithm */ typedef struct sctp_hmac_algo_param { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f22c079fd1f1..8042e6380b0e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1557,7 +1557,7 @@ struct sctp_association { * cookie and these are just pointers to those locations */ struct sctp_random_param *peer_random; - sctp_chunks_param_t *peer_chunks; + struct sctp_chunks_param *peer_chunks; sctp_hmac_algo_param_t *peer_hmacs; } peer; -- cgit From 1474774a7f0daf9878fd9537a24714f419e744ed Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 17 Jul 2017 11:29:59 +0800 Subject: sctp: remove the typedef sctp_hmac_algo_param_t This patch is to remove the typedef sctp_hmac_algo_param_t, and replace with struct sctp_hmac_algo_param in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- include/net/sctp/structs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b52def9bcfa1..913474dfc96c 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -327,10 +327,10 @@ struct sctp_chunks_param { }; /* AUTH Section 3.3 HMAC Algorithm */ -typedef struct sctp_hmac_algo_param { +struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; __be16 hmac_ids[0]; -} sctp_hmac_algo_param_t; +}; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): * The INIT ACK chunk is used to acknowledge the initiation of an SCTP diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8042e6380b0e..66cd7639b912 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1558,7 +1558,7 @@ struct sctp_association { */ struct sctp_random_param *peer_random; struct sctp_chunks_param *peer_chunks; - sctp_hmac_algo_param_t *peer_hmacs; + struct sctp_hmac_algo_param *peer_hmacs; } peer; /* State : A state variable indicating what state the -- cgit From 94e92e7ac90d06e1e839e112d3ae80b2457dbdd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Jul 2017 08:45:34 +0100 Subject: vfs: Add sb_rdonly(sb) to query the MS_RDONLY flag on s_flags Add an sb_rdonly() function to query the MS_RDONLY flag on sb->s_flags preparatory to providing an SB_RDONLY flag. Signed-off-by: David Howells --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b5d6816542b..6ae137c1bdf6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1844,7 +1844,8 @@ struct super_operations { */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) -#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY) +static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & MS_RDONLY; } +#define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ -- cgit From e462ec50cb5fad19f6003a3d8087f4a0945dd2b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Jul 2017 08:45:35 +0100 Subject: VFS: Differentiate mount flags (MS_*) from internal superblock flags Differentiate the MS_* flags passed to mount(2) from the internal flags set in the super_block's s_flags. s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. In this patch, just the headers are altered and some kernel code where blind automated conversion isn't necessarily correct. Note that this shows up some interesting issues: (1) Some MS_* flags get translated to MNT_* flags (such as MS_NODEV -> MNT_NODEV) without passing this on to the filesystem, but some filesystems set such flags anyway. (2) The ->remount_fs() methods of some filesystems adjust the *flags argument by setting MS_* flags in it, such as MS_NOATIME - but these flags are then scrubbed by do_remount_sb() (only the occupants of MS_RMT_MASK are permitted: MS_RDONLY, MS_SYNCHRONOUS, MS_MANDLOCK, MS_I_VERSION and MS_LAZYTIME) I'm not sure what's the best way to solve all these cases. Suggested-by: Al Viro Signed-off-by: David Howells --- include/linux/fs.h | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ae137c1bdf6..3d6ee0c0ebb0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1269,6 +1269,33 @@ extern int send_sigurg(struct fown_struct *fown); struct mm_struct; +/* + * sb->s_flags. Note that these mirror the equivalent MS_* flags where + * represented in both. + */ +#define SB_RDONLY 1 /* Mount read-only */ +#define SB_NOSUID 2 /* Ignore suid and sgid bits */ +#define SB_NODEV 4 /* Disallow access to device special files */ +#define SB_NOEXEC 8 /* Disallow program execution */ +#define SB_SYNCHRONOUS 16 /* Writes are synced at once */ +#define SB_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define SB_DIRSYNC 128 /* Directory modifications are synchronous */ +#define SB_NOATIME 1024 /* Do not update access times. */ +#define SB_NODIRATIME 2048 /* Do not update directory access times */ +#define SB_SILENT 32768 +#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define SB_I_VERSION (1<<23) /* Update inode I_version field */ +#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define SB_SUBMOUNT (1<<26) +#define SB_NOREMOTELOCK (1<<27) +#define SB_NOSEC (1<<28) +#define SB_BORN (1<<29) +#define SB_ACTIVE (1<<30) +#define SB_NOUSER (1<<31) + /* * Umount options */ @@ -1835,7 +1862,7 @@ struct super_operations { * possible to override it selectively if you really wanted to with some * ioctl() that is not currently implemented. * - * Exception: MS_RDONLY is always applied to the entire file system. + * Exception: SB_RDONLY is always applied to the entire file system. * * Unfortunately, it is possible to change a filesystems flags with it mounted * with files in use. This means that all of the inodes will not have their @@ -1846,18 +1873,18 @@ struct super_operations { static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & MS_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) -#define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \ +#define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) -#define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ +#define IS_DIRSYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) -#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) -#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) -#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) +#define IS_MANDLOCK(inode) __IS_FLG(inode, SB_MANDLOCK) +#define IS_NOATIME(inode) __IS_FLG(inode, SB_RDONLY|SB_NOATIME) +#define IS_I_VERSION(inode) __IS_FLG(inode, SB_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) -#define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) +#define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) @@ -2178,7 +2205,7 @@ static inline int __mandatory_lock(struct inode *ino) } /* - * ... and these candidates should be on MS_MANDLOCK mounted fs, + * ... and these candidates should be on SB_MANDLOCK mounted fs, * otherwise these will be advisory locks */ @@ -3274,7 +3301,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) static inline void inode_has_no_xattr(struct inode *inode) { - if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) + if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC)) inode->i_flags |= S_NOSEC; } -- cgit From c698316661096e036b54448039b35e1c2c5809f0 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Mon, 17 Jul 2017 11:41:39 +0200 Subject: extcon: cros-ec: Add extcon-cros-ec driver to support display out This is the driver for the USB Type C cable detection mechanism built into the ChromeOS Embedded Controller on systems that have USB Type-C ports. At present, this allows for the presence of display out, but in future, it may also be used to notify host and device type cables and the presence of power. Signed-off-by: Benson Leung Signed-off-by: Enric Balletbo i Serra Acked-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/cros_ec_commands.h | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 190c8f4afa02..2b16e95b9bb8 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -285,6 +285,11 @@ enum host_event_code { EC_HOST_EVENT_HANG_DETECT = 20, /* Hang detect logic detected a hang and warm rebooted the AP */ EC_HOST_EVENT_HANG_REBOOT = 21, + /* PD MCU triggering host event */ + EC_HOST_EVENT_PD_MCU = 22, + + /* EC desires to change state of host-controlled USB mux */ + EC_HOST_EVENT_USB_MUX = 28, /* * The high bit of the event mask is not used as a host event code. If @@ -2905,6 +2910,76 @@ struct ec_params_usb_pd_control { uint8_t mux; } __packed; +#define PD_CTRL_RESP_ENABLED_COMMS (1 << 0) /* Communication enabled */ +#define PD_CTRL_RESP_ENABLED_CONNECTED (1 << 1) /* Device connected */ +#define PD_CTRL_RESP_ENABLED_PD_CAPABLE (1 << 2) /* Partner is PD capable */ + +struct ec_response_usb_pd_control_v1 { + uint8_t enabled; + uint8_t role; + uint8_t polarity; + char state[32]; +} __packed; + +#define EC_CMD_USB_PD_PORTS 0x102 + +struct ec_response_usb_pd_ports { + uint8_t num_ports; +} __packed; + +#define EC_CMD_USB_PD_POWER_INFO 0x103 + +#define PD_POWER_CHARGING_PORT 0xff +struct ec_params_usb_pd_power_info { + uint8_t port; +} __packed; + +enum usb_chg_type { + USB_CHG_TYPE_NONE, + USB_CHG_TYPE_PD, + USB_CHG_TYPE_C, + USB_CHG_TYPE_PROPRIETARY, + USB_CHG_TYPE_BC12_DCP, + USB_CHG_TYPE_BC12_CDP, + USB_CHG_TYPE_BC12_SDP, + USB_CHG_TYPE_OTHER, + USB_CHG_TYPE_VBUS, + USB_CHG_TYPE_UNKNOWN, +}; + +struct usb_chg_measures { + uint16_t voltage_max; + uint16_t voltage_now; + uint16_t current_max; + uint16_t current_lim; +} __packed; + +struct ec_response_usb_pd_power_info { + uint8_t role; + uint8_t type; + uint8_t dualrole; + uint8_t reserved1; + struct usb_chg_measures meas; + uint32_t max_power; +} __packed; + +/* Get info about USB-C SS muxes */ +#define EC_CMD_USB_PD_MUX_INFO 0x11a + +struct ec_params_usb_pd_mux_info { + uint8_t port; /* USB-C port number */ +} __packed; + +/* Flags representing mux state */ +#define USB_PD_MUX_USB_ENABLED (1 << 0) +#define USB_PD_MUX_DP_ENABLED (1 << 1) +#define USB_PD_MUX_POLARITY_INVERTED (1 << 2) +#define USB_PD_MUX_HPD_IRQ (1 << 3) + +struct ec_response_usb_pd_mux_info { + uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */ +} __packed; + /*****************************************************************************/ /* * Passthru commands -- cgit From 450f0f6a8fb499a6ef3d10ab5c29a7c4650a9958 Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Mon, 10 Jul 2017 11:14:56 +0800 Subject: ASoC: rt5663: Add the manual offset field to compensate the DC offset The patch adds the manual offset field in the devicetree to compensate the DC offset that will be different between the PCB layout. It only can be measured by the real production. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- include/sound/rt5663.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/sound/rt5663.h (limited to 'include') diff --git a/include/sound/rt5663.h b/include/sound/rt5663.h new file mode 100644 index 000000000000..be6f1b2d8fbd --- /dev/null +++ b/include/sound/rt5663.h @@ -0,0 +1,20 @@ +/* + * linux/sound/rt5663.h -- Platform data for RT5663 + * + * Copyright 2017 Realtek Semiconductor Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_SND_RT5663_H +#define __LINUX_SND_RT5663_H + +struct rt5663_platform_data { + unsigned int dc_offset_l_manual; + unsigned int dc_offset_r_manual; +}; + +#endif + -- cgit From c54182ec0e157988f0cafd1e8d37b68ab4210f87 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 29 Jun 2017 12:00:05 +0200 Subject: EDAC: Get rid of mci->mod_ver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is a write-only variable so get rid of it. Signed-off-by: Borislav Petkov Acked-by: Robert Richter Acked-by: Michal Simek Acked-by: Thor Thayer Acked-by: Tony Luck Cc: Mark Gross Cc: Tim Small Cc: Ranganathan Desikan Cc: "Arvind R." Cc: Jason Baron Cc: "Sören Brinkmann" Cc: Ralf Baechle Cc: David Daney Cc: Loc Ho Cc: linux-edac@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@linux-mips.org --- include/linux/edac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/edac.h b/include/linux/edac.h index 8ae0f45fafd6..cd75c173fd00 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -619,7 +619,6 @@ struct mem_ctl_info { */ struct device *pdev; const char *mod_name; - const char *mod_ver; const char *ctl_name; const char *dev_name; void *pvt_info; -- cgit From a5d31a3f81c6fb13b381951bf6163444c0257e8b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 15 Jun 2017 14:05:20 -0600 Subject: char_dev: extend dynamic allocation of majors into a higher range We've run into problems with running out of dynamicly assign char device majors particullarly on automated test systems with all-yes-configs. Roughly 40 dynamic assignments can be made with such kernels at this time while space is reserved for only 20. Currently, the kernel only prints a warning when dynamic allocation overflows the reserved region. And when this happens drivers that have fixed assignments can randomly fail depending on the order of initialization of other drivers. Thus, adding a new char device can cause unexpected failures in completely unrelated parts of the kernel. This patch solves the problem by extending dynamic major number allocations down from 511 once the 234-254 region fills up. Fixed majors already exist above 255 so the infrastructure to support high number majors is already in place. The patch reserves an additional 128 major numbers which should hopefully last us a while. Kernels that don't require more than 20 dynamic majors assigned (which is pretty typical) should not be affected by this change. Signed-off-by: Logan Gunthorpe Cc: Linus Torvalds Cc: Alan Cox Cc: Arnd Bergmann Cc: Linus Walleij Link: https://lkml.org/lkml/2017/6/4/107 Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b5d6816542b..1773dcf1e318 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2473,6 +2473,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, #define CHRDEV_MAJOR_HASH_SIZE 255 /* Marks the bottom of the first segment of free char majors */ #define CHRDEV_MAJOR_DYN_END 234 +/* Marks the top and bottom of the second segment of free char majors */ +#define CHRDEV_MAJOR_DYN_EXT_START 511 +#define CHRDEV_MAJOR_DYN_EXT_END 384 + extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, -- cgit From 8a932f73e5b4227bf787474b44dc70b6961d6246 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 15 Jun 2017 14:05:21 -0600 Subject: char_dev: order /proc/devices by major number Presently, the order of the char devices listed in /proc/devices is not entirely sequential. If a char device has a major number greater than CHRDEV_MAJOR_HASH_SIZE (255), it will be ordered as if its major were module 255. For example, 511 appears after 1. This patch cleans that up and prints each major number in the correct order, regardless of where they are stored in the hash table. In order to do this, we introduce CHRDEV_MAJOR_MAX as an artificial limit (chosen to be 511). It will then print all devices in major order number from 0 to the maximum. Signed-off-by: Logan Gunthorpe Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Alan Cox Cc: Arnd Bergmann Cc: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1773dcf1e318..b07433c335ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2470,7 +2470,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, #endif /* fs/char_dev.c */ -#define CHRDEV_MAJOR_HASH_SIZE 255 +#define CHRDEV_MAJOR_MAX 512 /* Marks the bottom of the first segment of free char majors */ #define CHRDEV_MAJOR_DYN_END 234 /* Marks the top and bottom of the second segment of free char majors */ -- cgit From 133d55cdb2f1f9e258d6dc34594a6c565f10b3fd Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 16 Jun 2017 17:48:21 -0600 Subject: block: order /proc/devices by major number Presently, the order of the block devices listed in /proc/devices is not entirely sequential. If a block device has a major number greater than BLKDEV_MAJOR_HASH_SIZE (255), it will be ordered as if its major were module 255. For example, 511 appears after 1. This patch cleans that up and prints each major number in the correct order, regardless of where they are stored in the hash table. In order to do this, we introduce BLKDEV_MAJOR_MAX as an artificial limit (chosen to be 512). It will then print all devices in major order number from 0 to the maximum. Signed-off-by: Logan Gunthorpe Cc: Jens Axboe Cc: Jeff Layton Cc: "J. Bruce Fields" Signed-off-by: Greg Kroah-Hartman --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b07433c335ca..570dcc61fda6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2503,14 +2503,14 @@ static inline void unregister_chrdev(unsigned int major, const char *name) #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_HASH_SIZE 255 +#define BLKDEV_MAJOR_MAX 512 extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); extern void blkdev_show(struct seq_file *,off_t); #else -#define BLKDEV_MAJOR_HASH_SIZE 0 +#define BLKDEV_MAJOR_MAX 0 #endif extern void init_special_inode(struct inode *, umode_t, dev_t); -- cgit From c2a737eb2ea5682ffe63bc08003965496d6dc088 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 Jun 2017 09:39:02 +0530 Subject: debugfs: Add dummy implementation of few helpers This adds (missing) dummy implementations of debugfs_create_file_unsafe() and debugfs_create_ulong(). Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index aa86e6d8c1aa..b93efc8feecd 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -196,6 +196,14 @@ static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_file_unsafe(const char *name, + umode_t mode, struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_file_size(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops, @@ -289,6 +297,14 @@ static inline struct dentry *debugfs_create_u64(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_ulong(const char *name, + umode_t mode, + struct dentry *parent, + unsigned long *value) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) -- cgit From 95c40f41cfaf34e1c07812e93aa4b3263f9953f3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 25 Jun 2017 12:30:25 -0700 Subject: vmbus: drop unused ring_buffer_info elements The elements ring_data_start_offset and priv_write_index are not used. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b7d7bbec74e0..5e5f966bf37f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -124,8 +124,6 @@ struct hv_ring_buffer_info { spinlock_t ring_lock; u32 ring_datasize; /* < ring_size */ - u32 ring_data_startoffset; - u32 priv_write_index; u32 priv_read_index; u32 cached_read_index; }; -- cgit From 8dd45f2ab005a1f3301296059b23b03ec3dbf79b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 25 Jun 2017 12:30:26 -0700 Subject: vmbus: refactor hv_signal_on_read The function hv_signal_on_read was defined in hyperv.h and only used in one place in ring_buffer code. Clearer to just move it inline there. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 49 ------------------------------------------------- 1 file changed, 49 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5e5f966bf37f..308e1f9706bb 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1471,55 +1471,6 @@ hv_get_ring_buffer(const struct hv_ring_buffer_info *ring_info) return ring_info->ring_buffer->buffer; } -/* - * To optimize the flow management on the send-side, - * when the sender is blocked because of lack of - * sufficient space in the ring buffer, potential the - * consumer of the ring buffer can signal the producer. - * This is controlled by the following parameters: - * - * 1. pending_send_sz: This is the size in bytes that the - * producer is trying to send. - * 2. The feature bit feat_pending_send_sz set to indicate if - * the consumer of the ring will signal when the ring - * state transitions from being full to a state where - * there is room for the producer to send the pending packet. - */ - -static inline void hv_signal_on_read(struct vmbus_channel *channel) -{ - u32 cur_write_sz, cached_write_sz; - u32 pending_sz; - struct hv_ring_buffer_info *rbi = &channel->inbound; - - /* - * Issue a full memory barrier before making the signaling decision. - * Here is the reason for having this barrier: - * If the reading of the pend_sz (in this function) - * were to be reordered and read before we commit the new read - * index (in the calling function) we could - * have a problem. If the host were to set the pending_sz after we - * have sampled pending_sz and go to sleep before we commit the - * read index, we could miss sending the interrupt. Issue a full - * memory barrier to address this. - */ - virt_mb(); - - pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); - /* If the other end is not blocked on write don't bother. */ - if (pending_sz == 0) - return; - - cur_write_sz = hv_get_bytes_to_write(rbi); - - if (cur_write_sz < pending_sz) - return; - - cached_write_sz = hv_get_cached_bytes_to_write(rbi); - if (cached_write_sz < pending_sz) - vmbus_setevent(channel); -} - /* * Mask off host interrupt callback notifications */ -- cgit From 05d00bc94ac27d220d8a78e365d7fa3a26dcca17 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 25 Jun 2017 12:30:27 -0700 Subject: vmbus: eliminate duplicate cached index Don't need cached read index anymore now that packet iterator is used. The iterator has the original read index until the visible read_index is updated. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 308e1f9706bb..27db4e650b8c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -125,7 +125,6 @@ struct hv_ring_buffer_info { u32 ring_datasize; /* < ring_size */ u32 priv_read_index; - u32 cached_read_index; }; /* @@ -178,19 +177,6 @@ static inline u32 hv_get_bytes_to_write(const struct hv_ring_buffer_info *rbi) return write; } -static inline u32 hv_get_cached_bytes_to_write( - const struct hv_ring_buffer_info *rbi) -{ - u32 read_loc, write_loc, dsize, write; - - dsize = rbi->ring_datasize; - read_loc = rbi->cached_read_index; - write_loc = rbi->ring_buffer->write_index; - - write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : - read_loc - write_loc; - return write; -} /* * VMBUS version is 32 bit entity broken up into * two 16 bit quantities: major_number. minor_number. -- cgit From b9045b9c6b79d2ea5b888a0f9e9aa156d09ba8a9 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 12 Jul 2017 13:37:00 -0500 Subject: ASoC: tlv320aic32x4: Add gpio configuration to the codec Add the ability to configure the MFP1->MFP5 registers as GPIOs. In addition adding ALSA controls to get and set the GPIO state. Per the data sheet each MFP can be configured as a GPIO input only, output only or either an input or output. Signed-off-by: Dan Murphy Signed-off-by: Mark Brown --- include/sound/tlv320aic32x4.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/sound/tlv320aic32x4.h b/include/sound/tlv320aic32x4.h index 24e5d991f148..22305c0ab31a 100644 --- a/include/sound/tlv320aic32x4.h +++ b/include/sound/tlv320aic32x4.h @@ -22,7 +22,30 @@ #define AIC32X4_MICPGA_ROUTE_LMIC_IN2R_10K 0x00000001 #define AIC32X4_MICPGA_ROUTE_RMIC_IN1L_10K 0x00000002 +/* GPIO API */ +#define AIC32X4_MFPX_DEFAULT_VALUE 0xff + +#define AIC32X4_MFP1_DIN_DISABLED 0 +#define AIC32X4_MFP1_DIN_ENABLED 0x2 +#define AIC32X4_MFP1_GPIO_IN 0x4 + +#define AIC32X4_MFP2_GPIO_OUT_LOW 0x0 +#define AIC32X4_MFP2_GPIO_OUT_HIGH 0x1 + +#define AIC32X4_MFP_GPIO_ENABLED 0x4 + +#define AIC32X4_MFP5_GPIO_DISABLED 0x0 +#define AIC32X4_MFP5_GPIO_INPUT 0x8 +#define AIC32X4_MFP5_GPIO_OUTPUT 0xc +#define AIC32X4_MFP5_GPIO_OUT_LOW 0x0 +#define AIC32X4_MFP5_GPIO_OUT_HIGH 0x1 + +struct aic32x4_setup_data { + unsigned int gpio_func[5]; +}; + struct aic32x4_pdata { + struct aic32x4_setup_data *setup; u32 power_cfg; u32 micpga_routing; bool swapdacs; -- cgit From 68f6be656179d1f0399d77918b3e361dad420455 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Wed, 14 Jun 2017 10:36:27 -0500 Subject: fpga: Add flag to indicate SPI bitstream is bit-reversed Add a flag that is passed to the write_init() callback, indicating that the SPI bitstream starts with LSB first. SPI controllers usually send data with MSB first. If an FPGA expects bitstream data as LSB first, the data must be reversed either by the SPI controller or by the driver. Alternatively the bitstream could be prepared as bit-reversed to avoid the bit-swapping while sending. This flag indicates such bit-reversed SPI bitstream. The low-level driver will deal with the flag and perform bit-reversing if needed. Signed-off-by: Anatolij Gustschin Signed-off-by: Joshua Clayton Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index b4ac24c4411d..01c348ca38b7 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -67,10 +67,12 @@ enum fpga_mgr_states { * FPGA Manager flags * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting + * FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) #define FPGA_MGR_EXTERNAL_CONFIG BIT(1) #define FPGA_MGR_ENCRYPTED_BITSTREAM BIT(2) +#define FPGA_MGR_BITSTREAM_LSB_FIRST BIT(3) /** * struct fpga_image_info - information specific to a FPGA image -- cgit From 3b88da4aba2549497ac8868877562f1b1913ca62 Mon Sep 17 00:00:00 2001 From: Joshua Clayton Date: Wed, 14 Jun 2017 10:36:31 -0500 Subject: lib: add bitrev8x4() Add a function to reverse bytes within a 32 bit word. Operate on a u32 rather than individual bytes. Signed-off-by: Joshua Clayton Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- include/linux/bitrev.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index fb790b8449c1..b97be27e5a85 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -29,6 +29,8 @@ static inline u32 __bitrev32(u32 x) #endif /* CONFIG_HAVE_ARCH_BITREVERSE */ +#define __bitrev8x4(x) (__bitrev32(swab32(x))) + #define __constant_bitrev32(x) \ ({ \ u32 __x = x; \ @@ -50,6 +52,15 @@ static inline u32 __bitrev32(u32 x) __x; \ }) +#define __constant_bitrev8x4(x) \ +({ \ + u32 __x = x; \ + __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ + __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ + __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ + __x; \ +}) + #define __constant_bitrev8(x) \ ({ \ u8 __x = x; \ @@ -75,6 +86,14 @@ static inline u32 __bitrev32(u32 x) __bitrev16(__x); \ }) +#define bitrev8x4(x) \ +({ \ + u32 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev8x4(__x) : \ + __bitrev8x4(__x); \ + }) + #define bitrev8(x) \ ({ \ u8 __x = x; \ -- cgit From b37fa56069ce61d97a77dadca8dd65e522db3387 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Wed, 14 Jun 2017 10:36:34 -0500 Subject: fpga: Add flag to indicate bitstream needs decompression Add a flag that is passed to the write_init() callback, indicating that the bitstream is compressed. The low-level driver will deal with the flag, or return an error, if compressed bitstreams are not supported. Signed-off-by: Anatolij Gustschin Reviewed-by: Andy Shevchenko Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 01c348ca38b7..bfa14bc023fb 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -68,11 +68,13 @@ enum fpga_mgr_states { * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting * FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first + * FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) #define FPGA_MGR_EXTERNAL_CONFIG BIT(1) #define FPGA_MGR_ENCRYPTED_BITSTREAM BIT(2) #define FPGA_MGR_BITSTREAM_LSB_FIRST BIT(3) +#define FPGA_MGR_COMPRESSED_BITSTREAM BIT(4) /** * struct fpga_image_info - information specific to a FPGA image -- cgit From 805df2966f67a6b1a228c8e580e230b6c849b41e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 23 Jun 2017 14:55:32 +0530 Subject: arch_topology: Change return type of topology_parse_cpu_capacity() to bool topology_parse_cpu_capacity() returns 1 on success and 0 on errors. Make it return bool instead of int as that suits the purpose better. Signed-off-by: Viresh Kumar Reviewed-by: Juri Lelli Tested-by: Juri Lelli Signed-off-by: Greg Kroah-Hartman --- include/linux/arch_topology.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 9af3c174c03a..716ce587247e 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -4,10 +4,12 @@ #ifndef _LINUX_ARCH_TOPOLOGY_H_ #define _LINUX_ARCH_TOPOLOGY_H_ +#include + void topology_normalize_cpu_scale(void); struct device_node; -int topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); +bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); struct sched_domain; unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu); -- cgit From 27eac47b00789522ba00501b0838026e1ecb6f05 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 17 Jul 2017 11:35:54 +0200 Subject: net/unix: drop obsolete fd-recursion limits All unix sockets now account inflight FDs to the respective sender. This was introduced in: commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 Author: willy tarreau Date: Sun Jan 10 07:54:56 2016 +0100 unix: properly account for FDs passed over unix sockets and further refined in: commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 Author: Hannes Frederic Sowa Date: Wed Feb 3 02:11:03 2016 +0100 unix: correctly track in-flight fds in sending process user_struct Hence, regardless of the stacking depth of FDs, the total number of inflight FDs is limited, and accounted. There is no known way for a local user to exceed those limits or exploit the accounting. Furthermore, the GC logic is independent of the recursion/stacking depth as well. It solely depends on the total number of inflight FDs, regardless of their layout. Lastly, the current `recursion_level' suffers a TOCTOU race, since it checks and inherits depths only at queue time. If we consider `A<-B' to mean `queue-B-on-A', the following sequence circumvents the recursion level easily: A<-B B<-C C<-D ... Y<-Z resulting in: A<-B<-C<-...<-Z With all of this in mind, lets drop the recursion limit. It has no additional security value, anymore. On the contrary, it randomly confuses message brokers that try to forward file-descriptors, since any sendmsg(2) call can fail spuriously with ETOOMANYREFS if a client maliciously modifies the FD while inflight. Cc: Alban Crequy Cc: Simon McVittie Signed-off-by: David Herrmann Reviewed-by: Tom Gundersen Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 678e4d6fa317..3b3194b2fc65 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -58,7 +58,6 @@ struct unix_sock { struct list_head link; atomic_long_t inflight; spinlock_t lock; - unsigned char recursion_level; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 -- cgit From b145425f269a17ed344d737f746b844dfac60c82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Jul 2017 02:56:10 -0700 Subject: inetpeer: remove AVL implementation in favor of RB tree As discussed in Faro during Netfilter Workshop 2017, RB trees can be used with RCU, using a seqlock. Note that net/rxrpc/conn_service.c is already using this. This patch converts inetpeer from AVL tree to RB tree, since it allows to remove private AVL implementation in favor of shared RB code. $ size net/ipv4/inetpeer.before net/ipv4/inetpeer.after text data bss dec hex filename 3195 40 128 3363 d23 net/ipv4/inetpeer.before 1562 24 0 1586 632 net/ipv4/inetpeer.after The same technique can be used to speed up net/netfilter/nft_set_rbtree.c (removing rwlock contention in fast path) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index f2a215fc78e4..950ed182f62f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -33,18 +33,12 @@ struct inetpeer_addr { }; struct inet_peer { - /* group together avl_left,avl_right,v4daddr to speedup lookups */ - struct inet_peer __rcu *avl_left, *avl_right; + struct rb_node rb_node; struct inetpeer_addr daddr; - __u32 avl_height; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - union { - struct list_head gc_list; - struct rcu_head gc_rcu; - }; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid @@ -55,7 +49,6 @@ struct inet_peer { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; - struct inet_peer *gc_next; }; /* following fields might be frequently dirtied */ @@ -64,7 +57,7 @@ struct inet_peer { }; struct inet_peer_base { - struct inet_peer __rcu *root; + struct rb_root rb_root; seqlock_t lock; int total; }; -- cgit From 814abfabef3ceed390c10d06a0cc69a86454b6cf Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:27:07 -0700 Subject: xdp: add bpf_redirect helper function This adds support for a bpf_redirect helper function to the XDP infrastructure. For now this only supports redirecting to the egress path of a port. In order to support drivers handling a xdp_buff natively this patches uses a new ndo operation ndo_xdp_xmit() that takes pushes a xdp_buff to the specified device. If the program specifies either (a) an unknown device or (b) a device that does not support the operation a BPF warning is thrown and the XDP_ABORTED error code is returned. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ++++ include/linux/netdevice.h | 6 ++++++ include/uapi/linux/bpf.h | 1 + 3 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index bfef1e5734f8..64cae7a08148 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -711,7 +711,11 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp); + void bpf_warn_invalid_xdp_action(u32 act); +void bpf_warn_invalid_xdp_redirect(u32 ifindex); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..77f5376005e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -66,6 +66,7 @@ struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; struct bpf_prog; +struct xdp_buff; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -1138,6 +1139,9 @@ struct xfrmdev_ops { * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); * This function is used to set or query state related to XDP on the * netdevice. See definition of enum xdp_netdev_command for details. + * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + * This function is used to submit a XDP packet for transmit on a + * netdevice. * */ struct net_device_ops { @@ -1323,6 +1327,8 @@ struct net_device_ops { int needed_headroom); int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + int (*ndo_xdp_xmit)(struct net_device *dev, + struct xdp_buff *xdp); }; /** diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e99e3e6f8b37..4dbb7a3f4677 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -717,6 +717,7 @@ enum xdp_action { XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook -- cgit From 6103aa96ec077c976e851e0b89cc2446cb76573d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:27:50 -0700 Subject: net: implement XDP_REDIRECT for xdp generic Add support for redirect to xdp generic creating a fall back for devices that do not yet have support and allowing test infrastructure using veth pairs to be built. Signed-off-by: John Fastabend Tested-by: Andy Gospodarek Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 64cae7a08148..10df7daf5ec6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -712,6 +712,7 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp); void bpf_warn_invalid_xdp_action(u32 act); -- cgit From 5acaee0a8964c9bab7775ab8bedcd1f66a2a1011 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:28:35 -0700 Subject: xdp: add trace event for xdp redirect This adds a trace event for xdp redirect which may help when debugging XDP programs that use redirect bpf commands. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 4 +++- include/trace/events/xdp.h | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 10df7daf5ec6..ce8211fa91c7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -713,7 +713,9 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp); +int xdp_do_redirect(struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *prog); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1b61357d3f57..7b1eb7b4be41 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -12,7 +12,8 @@ FN(ABORTED) \ FN(DROP) \ FN(PASS) \ - FN(TX) + FN(TX) \ + FN(REDIRECT) #define __XDP_ACT_TP_FN(x) \ TRACE_DEFINE_ENUM(XDP_##x); @@ -48,6 +49,34 @@ TRACE_EVENT(xdp_exception, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) ); +TRACE_EVENT(xdp_redirect, + + TP_PROTO(const struct net_device *from, + const struct net_device *to, + const struct bpf_prog *xdp, u32 act), + + TP_ARGS(from, to, xdp, act), + + TP_STRUCT__entry( + __string(name_from, from->name) + __string(name_to, to->name) + __array(u8, prog_tag, 8) + __field(u32, act) + ), + + TP_fast_assign( + BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); + memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __assign_str(name_from, from->name); + __assign_str(name_to, to->name); + __entry->act = act; + ), + + TP_printk("prog=%s from=%s to=%s action=%s", + __print_hex_str(__entry->prog_tag, 8), + __get_str(name_from), __get_str(name_to), + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) +); #endif /* _TRACE_XDP_H */ #include -- cgit From 546ac1ffb70d25b56c1126940e5ec639c4dd7413 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:28:56 -0700 Subject: bpf: add devmap, a map for storing net device references Device map (devmap) is a BPF map, primarily useful for networking applications, that uses a key to lookup a reference to a netdevice. The map provides a clean way for BPF programs to build virtual port to physical port maps. Additionally, it provides a scoping function for the redirect action itself allowing multiple optimizations. Future patches will leverage the map to provide batching at the XDP layer. Another optimization/feature, that is not yet implemented, would be to support multiple netdevices per key to support efficient multicast and broadcast support. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 3 +++ include/uapi/linux/bpf.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3d137c33d664..b1e1035ca24b 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -35,3 +35,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +#endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4dbb7a3f4677..ecbb0e7e15bc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -104,6 +104,7 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, }; enum bpf_prog_type { -- cgit From 97f91a7cf04ff605845c20948b8a80e54cbd3376 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:29:18 -0700 Subject: bpf: add bpf_redirect_map helper routine BPF programs can use the devmap with a bpf_redirect_map() helper routine to forward packets to netdevice in map. Signed-off-by: John Fastabend Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b69e7a5869ff..d0d3281ac678 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -379,4 +379,7 @@ extern const struct bpf_func_proto bpf_get_stackid_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +/* Map specifics */ +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); + #endif /* _LINUX_BPF_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ecbb0e7e15bc..1106a8c4cd36 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -348,6 +348,11 @@ union bpf_attr { * @flags: bit 0 - if set, redirect to ingress instead of egress * other bits - reserved * Return: TC_ACT_REDIRECT + * int bpf_redirect_map(key, map, flags) + * redirect to endpoint in map + * @key: index in map to lookup + * @map: fd of map to do lookup in + * @flags: -- * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -592,7 +597,8 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(redirect_map), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit From 11393cc9b9be2a1f61559e6fb9c27bc8fa20b1ff Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:29:40 -0700 Subject: xdp: Add batching support to redirect map For performance reasons we want to avoid updating the tail pointer in the driver tx ring as much as possible. To accomplish this we add batching support to the redirect path in XDP. This adds another ndo op "xdp_flush" that is used to inform the driver that it should bump the tail pointer on the TX ring. Signed-off-by: John Fastabend Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ include/linux/filter.h | 7 +++++++ include/linux/netdevice.h | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d0d3281ac678..6850a760dc94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -381,5 +381,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +void __dev_map_insert_ctx(struct bpf_map *map, u32 index); +void __dev_map_flush(struct bpf_map *map); #endif /* _LINUX_BPF_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index ce8211fa91c7..3323ee91c172 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -712,10 +712,17 @@ bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the + * same cpu context. Further for best results no more than a single map + * for the do_redirect/do_flush pair should be used. This limitation is + * because we only track one map and force a flush when the map changes. + * This does not appear to be a real limiation for existing software. + */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); +void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 77f5376005e6..03b104908235 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1142,7 +1142,9 @@ struct xfrmdev_ops { * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. - * + * void (*ndo_xdp_flush)(struct net_device *dev); + * This function is used to inform the driver to flush a paticular + * xpd tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1329,6 +1331,7 @@ struct net_device_ops { struct netdev_xdp *xdp); int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); + void (*ndo_xdp_flush)(struct net_device *dev); }; /** -- cgit From 2ddf71e23cc246e95af72a6deed67b4a50a7b81c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 09:30:02 -0700 Subject: net: add notifier hooks for devmap bpf map The BPF map devmap holds a refcnt on the net_device structure when it is in the map. We need to do this to ensure on driver unload we don't lose a dev reference. However, its not very convenient to have to manually unload the map when destroying a net device so add notifier handlers to do the cleanup automatically. But this creates a race between update/destroy BPF syscall and programs and the unregister netdev hook. Unfortunately, the best I could come up with is either to live with requiring manual removal of net devices from the map before removing the net device OR to add a mutex in devmap to ensure the map is not modified while we are removing a device. The fallout also requires that BPF programs no longer update/delete the map from the BPF program side because the mutex may sleep and this can not be done from inside an rcu critical section. This is not a real problem though because I have not come up with any use cases where this is actually useful in practice. If/when we come up with a compelling user for this we may need to revisit this. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3323ee91c172..d19ed3c15e1e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -716,7 +716,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is * because we only track one map and force a flush when the map changes. - * This does not appear to be a real limiation for existing software. + * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); int xdp_do_redirect(struct net_device *dev, -- cgit From 880388aa3c07fdea4f9b85e35641753017b1852f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Jul 2017 07:29:12 -0700 Subject: net: Remove all references to SKB_GSO_UDP. Such packets are no longer possible. Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5209b5ed2a64..32fb046f2173 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; break; - case VIRTIO_NET_HDR_GSO_UDP: - gso_type = SKB_GSO_UDP; - break; default: return -EINVAL; } @@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - else if (sinfo->gso_type & SKB_GSO_UDP) - hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) -- cgit From d9d30adf56777c402c0027c0e6ae21f17cc0a365 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 3 Jul 2017 07:31:57 -0700 Subject: net: Kill NETIF_F_UFO and SKB_GSO_UDP. No longer used. Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 +--- include/linux/netdevice.h | 1 - include/linux/skbuff.h | 31 +++++++++++++++---------------- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1d4737cffc71..ebd273627334 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -36,7 +36,6 @@ enum { /**/NETIF_F_GSO_SHIFT, /* keep the order of SKB_GSO_* bits */ NETIF_F_TSO_BIT /* ... TCPv4 segmentation */ = NETIF_F_GSO_SHIFT, - NETIF_F_UFO_BIT, /* ... UDPv4 fragmentation */ NETIF_F_GSO_ROBUST_BIT, /* ... ->SKB_GSO_DODGY */ NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */ NETIF_F_TSO_MANGLEID_BIT, /* ... IPV4 ID mangling allowed */ @@ -118,7 +117,6 @@ enum { #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) #define NETIF_F_TSO __NETIF_F(TSO) -#define NETIF_F_UFO __NETIF_F(UFO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) #define NETIF_F_RXALL __NETIF_F(RXALL) @@ -172,7 +170,7 @@ enum { NETIF_F_FSO) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ NETIF_F_GSO_SCTP) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 03b104908235..c60351b84323 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4098,7 +4098,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbe29b6c9bd6..4d7a284ba3ee 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -463,39 +463,38 @@ enum { enum { SKB_GSO_TCPV4 = 1 << 0, - SKB_GSO_UDP = 1 << 1, /* This indicates the skb is from an untrusted source. */ - SKB_GSO_DODGY = 1 << 2, + SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ - SKB_GSO_TCP_ECN = 1 << 3, + SKB_GSO_TCP_ECN = 1 << 2, - SKB_GSO_TCP_FIXEDID = 1 << 4, + SKB_GSO_TCP_FIXEDID = 1 << 3, - SKB_GSO_TCPV6 = 1 << 5, + SKB_GSO_TCPV6 = 1 << 4, - SKB_GSO_FCOE = 1 << 6, + SKB_GSO_FCOE = 1 << 5, - SKB_GSO_GRE = 1 << 7, + SKB_GSO_GRE = 1 << 6, - SKB_GSO_GRE_CSUM = 1 << 8, + SKB_GSO_GRE_CSUM = 1 << 7, - SKB_GSO_IPXIP4 = 1 << 9, + SKB_GSO_IPXIP4 = 1 << 8, - SKB_GSO_IPXIP6 = 1 << 10, + SKB_GSO_IPXIP6 = 1 << 9, - SKB_GSO_UDP_TUNNEL = 1 << 11, + SKB_GSO_UDP_TUNNEL = 1 << 10, - SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12, + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_PARTIAL = 1 << 13, + SKB_GSO_PARTIAL = 1 << 12, - SKB_GSO_TUNNEL_REMCSUM = 1 << 14, + SKB_GSO_TUNNEL_REMCSUM = 1 << 13, - SKB_GSO_SCTP = 1 << 15, + SKB_GSO_SCTP = 1 << 14, - SKB_GSO_ESP = 1 << 16, + SKB_GSO_ESP = 1 << 15, }; #if BITS_PER_LONG > 32 -- cgit From 06dc75ab06943fcc126a951a0680980ad5cb75c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 18:56:54 +0200 Subject: net: Revert "net: add function to allocate sk_buff head without data area" It was added for netlink mmap tx, there are no callers in the tree. The commit also added a check for skb->head != NULL in kfree_skb path, remove that too -- all skbs ought to have skb->head set. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4d7a284ba3ee..4093552be1de 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -944,12 +944,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -struct sk_buff *__alloc_skb_head(gfp_t priority, int node); -static inline struct sk_buff *alloc_skb_head(gfp_t priority) -{ - return __alloc_skb_head(priority, -1); -} - struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); -- cgit From 46f55cffa47330b99537985a50d92945d4b34658 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jul 2017 21:56:48 -0700 Subject: net: fix build error in devmap helper calls Initial patches missed case with CONFIG_BPF_SYSCALL not set. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine") Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6850a760dc94..6353c7474dba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -318,6 +318,12 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); + +/* Map specifics */ +struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +void __dev_map_insert_ctx(struct bpf_map *map, u32 index); +void __dev_map_flush(struct bpf_map *map); + #else static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -356,6 +362,20 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } + +static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} + +static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) +{ +} + +static inline void __dev_map_flush(struct bpf_map *map) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ @@ -379,9 +399,4 @@ extern const struct bpf_func_proto bpf_get_stackid_proto; void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -/* Map specifics */ -struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_insert_ctx(struct bpf_map *map, u32 index); -void __dev_map_flush(struct bpf_map *map); - #endif /* _LINUX_BPF_H */ -- cgit From 3cf29931453215536916d0c4da953fce1911ced3 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 14 Jul 2017 19:38:36 +0900 Subject: LSM: Remove security_task_create() hook. Since commit a79be238600d1a03 ("selinux: Use task_alloc hook rather than task_create hook") changed to use task_alloc hook, task_create hook is no longer used. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 7 ------- include/linux/security.h | 6 ------ 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3cc9d77c7527..575703cb17b8 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -529,11 +529,6 @@ * * Security hooks for task operations. * - * @task_create: - * Check permission before creating a child process. See the clone(2) - * manual page for definitions of the @clone_flags. - * @clone_flags contains the flags indicating what should be shared. - * Return 0 if permission is granted. * @task_alloc: * @task task being allocated. * @clone_flags contains the flags indicating what should be shared. @@ -1509,7 +1504,6 @@ union security_list_options { int (*file_receive)(struct file *file); int (*file_open)(struct file *file, const struct cred *cred); - int (*task_create)(unsigned long clone_flags); int (*task_alloc)(struct task_struct *task, unsigned long clone_flags); void (*task_free)(struct task_struct *task); int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); @@ -1784,7 +1778,6 @@ struct security_hook_heads { struct list_head file_send_sigiotask; struct list_head file_receive; struct list_head file_open; - struct list_head task_create; struct list_head task_alloc; struct list_head task_free; struct list_head cred_alloc_blank; diff --git a/include/linux/security.h b/include/linux/security.h index b6ea1dc9cc9d..458e24bea2d4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -318,7 +318,6 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file, const struct cred *cred); -int security_task_create(unsigned long clone_flags); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); @@ -885,11 +884,6 @@ static inline int security_file_open(struct file *file, return 0; } -static inline int security_task_create(unsigned long clone_flags) -{ - return 0; -} - static inline int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { -- cgit From fc60a8b675bd9499c71716d21c238eed5092ddfc Mon Sep 17 00:00:00 2001 From: Andreas Färber Date: Sun, 9 Jul 2017 22:29:42 +0200 Subject: tty: serial: owl: Implement console driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement serial console driver to complement earlycon. Based on LeMaker linux-actions tree. Signed-off-by: Andreas Färber Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index c34a2a3eeff5..38bea3217ead 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -70,6 +70,7 @@ #define PORT_CLPS711X 33 #define PORT_SA1100 34 #define PORT_UART00 35 +#define PORT_OWL 36 #define PORT_21285 37 /* Sparc type numbers. */ -- cgit From 7744ccdbc16f0ac4adae21b3678af93775b3a386 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:03 -0500 Subject: x86/mm: Add Secure Memory Encryption (SME) support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Secure Memory Encryption (SME). This initial support provides a Kconfig entry to build the SME support into the kernel and defines the memory encryption mask that will be used in subsequent patches to mark pages as encrypted. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/a6c34d16caaed3bc3e2d6f0987554275bd291554.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/mem_encrypt.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/linux/mem_encrypt.h (limited to 'include') diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h new file mode 100644 index 000000000000..59769f7287e4 --- /dev/null +++ b/include/linux/mem_encrypt.h @@ -0,0 +1,35 @@ +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MEM_ENCRYPT_H__ +#define __MEM_ENCRYPT_H__ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_ARCH_HAS_MEM_ENCRYPT + +#include + +#else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */ + +#define sme_me_mask 0UL + +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ + +static inline bool sme_active(void) +{ + return !!sme_me_mask; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __MEM_ENCRYPT_H__ */ -- cgit From 5868f3651fa0dff96a57f94d49247d3ef320ebe2 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:05 -0500 Subject: x86/mm: Add support to enable SME in early boot processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to the early boot code to use Secure Memory Encryption (SME). Since the kernel has been loaded into memory in a decrypted state, encrypt the kernel in place and update the early pagetables with the memory encryption mask so that new pagetable entries will use memory encryption. The routines to set the encryption mask and perform the encryption are stub routines for now with functionality to be added in a later patch. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/e52ad781f085224bf835b3caff9aa3aee6febccb.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/mem_encrypt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index 59769f7287e4..570f4fcff13f 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -30,6 +30,11 @@ static inline bool sme_active(void) return !!sme_me_mask; } +static inline unsigned long sme_get_me_mask(void) +{ + return sme_me_mask; +} + #endif /* __ASSEMBLY__ */ #endif /* __MEM_ENCRYPT_H__ */ -- cgit From 21729f81ce8ae76a6995681d40e16f7ce8075db4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:07 -0500 Subject: x86/mm: Provide general kernel support for memory encryption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes to the existing page table macros will allow the SME support to be enabled in a simple fashion with minimal changes to files that use these macros. Since the memory encryption mask will now be part of the regular pagetable macros, we introduce two new macros (_PAGE_TABLE_NOENC and _KERNPG_TABLE_NOENC) to allow for early pagetable creation/initialization without the encryption mask before SME becomes active. Two new pgprot() macros are defined to allow setting or clearing the page encryption mask. The FIXMAP_PAGE_NOCACHE define is introduced for use with MMIO. SME does not support encryption for MMIO areas so this define removes the encryption mask from the page attribute. Two new macros are introduced (__sme_pa() / __sme_pa_nodebug()) to allow creating a physical address with the encryption mask. These are used when working with the cr3 register so that the PGD can be encrypted. The current __va() macro is updated so that the virtual address is generated based off of the physical address without the encryption mask thus allowing the same virtual address to be generated regardless of whether encryption is enabled for that physical location or not. Also, an early initialization function is added for SME. If SME is active, this function: - Updates the early_pmd_flags so that early page faults create mappings with the encryption mask. - Updates the __supported_pte_mask to include the encryption mask. - Updates the protection_map entries to include the encryption mask so that user-space allocations will automatically have the encryption mask applied. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/b36e952c4c39767ae7f0a41cf5345adf27438480.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/asm-generic/pgtable.h | 12 ++++++++++++ include/linux/mem_encrypt.h | 8 ++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 7dfa767dc680..4d7bb98f4134 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -582,6 +582,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ #endif /* CONFIG_MMU */ +/* + * No-op macros that just return the current protection value. Defined here + * because these macros can be used used even if CONFIG_MMU is not defined. + */ +#ifndef pgprot_encrypted +#define pgprot_encrypted(prot) (prot) +#endif + +#ifndef pgprot_decrypted +#define pgprot_decrypted(prot) (prot) +#endif + /* * A facility to provide lazy MMU batching. This allows PTE updates and * page invalidations to be delayed until a call to leave lazy MMU mode diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index 570f4fcff13f..1255f09f5e42 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -35,6 +35,14 @@ static inline unsigned long sme_get_me_mask(void) return sme_me_mask; } +/* + * The __sme_set() and __sme_clr() macros are useful for adding or removing + * the encryption mask from a value (e.g. when dealing with pagetable + * entries). + */ +#define __sme_set(x) ((unsigned long)(x) | sme_me_mask) +#define __sme_clr(x) ((unsigned long)(x) & ~sme_me_mask) + #endif /* __ASSEMBLY__ */ #endif /* __MEM_ENCRYPT_H__ */ -- cgit From f88a68facd9a15b94f8c195d9d2c0b30c76c595a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:09 -0500 Subject: x86/mm: Extend early_memremap() support with additional attrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add early_memremap() support to be able to specify encrypted and decrypted mappings with and without write-protection. The use of write-protection is necessary when encrypting data "in place". The write-protect attribute is considered cacheable for loads, but not stores. This implies that the hardware will never give the core a dirty line with this memtype. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/479b5832c30fae3efa7932e48f81794e86397229.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/asm-generic/early_ioremap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h index 734ad4db388c..2edef8d7fa6b 100644 --- a/include/asm-generic/early_ioremap.h +++ b/include/asm-generic/early_ioremap.h @@ -13,6 +13,8 @@ extern void *early_memremap(resource_size_t phys_addr, unsigned long size); extern void *early_memremap_ro(resource_size_t phys_addr, unsigned long size); +extern void *early_memremap_prot(resource_size_t phys_addr, + unsigned long size, unsigned long prot_val); extern void early_iounmap(void __iomem *addr, unsigned long size); extern void early_memunmap(void *addr, unsigned long size); -- cgit From a19d66c56af1c52b8b463bf94d21116ae8c1aa5a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:13 -0500 Subject: efi: Add an EFI table address match function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a function that will determine if a supplied physical address matches the address of an EFI table. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Matt Fleming Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/e1e06441d80f44776df391e0e4cb485b345b7518.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/efi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 8269bcb8ccf7..8e24f099bd3f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1091,6 +1091,8 @@ static inline bool efi_enabled(int feature) return test_bit(feature, &efi.flags) != 0; } extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); + +extern bool efi_is_table_address(unsigned long phys_addr); #else static inline bool efi_enabled(int feature) { @@ -1104,6 +1106,11 @@ efi_capsule_pending(int *reset_type) { return false; } + +static inline bool efi_is_table_address(unsigned long phys_addr) +{ + return false; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit From f99afd08a45fbbd9ce35a7624ffd1d850a1906c0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:14 -0500 Subject: efi: Update efi_mem_type() to return an error rather than 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The efi_mem_type() function currently returns a 0, which maps to EFI_RESERVED_TYPE, if the function is unable to find a memmap entry for the supplied physical address. Returning EFI_RESERVED_TYPE implies that a memmap entry exists, when it doesn't. Instead of returning 0, change the function to return a negative error value when no memmap entry is found. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Matt Fleming Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/7fbf40a9dc414d5da849e1ddcd7f7c1285e4e181.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 8e24f099bd3f..4e47f78430be 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -985,7 +985,7 @@ static inline void efi_esrt_init(void) { } extern int efi_config_parse_tables(void *config_tables, int count, int sz, efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); -extern u32 efi_mem_type (unsigned long phys_addr); +extern int efi_mem_type(unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); -- cgit From 8f716c9b5febf6ed0f5fedb7c9407cd0c25b2796 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:16 -0500 Subject: x86/mm: Add support to access boot related data in the clear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boot data (such as EFI related data) is not encrypted when the system is booted because UEFI/BIOS does not run with SME active. In order to access this data properly it needs to be mapped decrypted. Update early_memremap() to provide an arch specific routine to modify the pagetable protection attributes before they are applied to the new mapping. This is used to remove the encryption mask for boot related data. Update memremap() to provide an arch specific routine to determine if RAM remapping is allowed. RAM remapping will cause an encrypted mapping to be generated. By preventing RAM remapping, ioremap_cache() will be used instead, which will provide a decrypted mapping of the boot related data. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Matt Fleming Reviewed-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/81fb6b4117a5df6b9f2eda342f81bbef4b23d2e5.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/io.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/io.h b/include/linux/io.h index 2195d9ea4aaa..32e30e8fb9db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -157,6 +157,8 @@ enum { MEMREMAP_WB = 1 << 0, MEMREMAP_WT = 1 << 1, MEMREMAP_WC = 1 << 2, + MEMREMAP_ENC = 1 << 3, + MEMREMAP_DEC = 1 << 4, }; void *memremap(resource_size_t offset, size_t size, unsigned long flags); -- cgit From c7753208a94c73d5beb1e4bd843081d6dc7d4678 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:21 -0500 Subject: x86, swiotlb: Add memory encryption support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since DMA addresses will effectively look like 48-bit addresses when the memory encryption mask is set, SWIOTLB is needed if the DMA mask of the device performing the DMA does not support 48-bits. SWIOTLB will be initialized to create decrypted bounce buffers for use by these devices. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/aa2d29b78ae7d508db8881e46a3215231b9327a7.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 4ee479f2f355..15e7160751a8 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -35,6 +35,7 @@ int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); +extern void __init swiotlb_update_mem_attributes(void); /* * Enumeration for sync targets -- cgit From 648babb7078c6310d2af5b8aa01f086030916968 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:22 -0500 Subject: swiotlb: Add warnings for use of bounce buffers with SME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add warnings to let the user know when bounce buffers are being used for DMA when SME is active. Since the bounce buffers are not in encrypted memory, these notifications are to allow the user to determine some appropriate action - if necessary. Actions can range from utilizing an IOMMU, replacing the device with another device that can support 64-bit DMA, ignoring the message if the device isn't used much, etc. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/d112564053c3f2e86ca634a8d4fa4abc0eb53a6a.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/dma-mapping.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 843ab866e0f4..fce2369ecf82 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -10,6 +10,7 @@ #include #include #include +#include /** * List of possible attributes associated with a DMA mapping. The semantics @@ -548,6 +549,12 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } +static inline void dma_check_mask(struct device *dev, u64 mask) +{ + if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1))) + dev_warn(dev, "SME is active, device will require DMA bounce buffers\n"); +} + static inline int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -564,6 +571,9 @@ static inline int dma_set_mask(struct device *dev, u64 mask) { if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; + + dma_check_mask(dev, mask); + *dev->dma_mask = mask; return 0; } @@ -583,6 +593,9 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) return -EIO; + + dma_check_mask(dev, mask); + dev->coherent_dma_mask = mask; return 0; } -- cgit From bba4ed011a52d494aa7ef5e08cf226709bbf3f60 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:28 -0500 Subject: x86/mm, kexec: Allow kexec to be used with SME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide support so that kexec can be used to boot a kernel when SME is enabled. Support is needed to allocate pages for kexec without encryption. This is needed in order to be able to reboot in the kernel in the same manner as originally booted. Additionally, when shutting down all of the CPUs we need to be sure to flush the caches and then halt. This is needed when booting from a state where SME was not active into a state where SME is active (or vice-versa). Without these steps, it is possible for cache lines to exist for the same physical location but tagged both with and without the encryption bit. This can cause random memory corruption when caches are flushed depending on which cacheline is written last. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/b95ff075db3e7cd545313f2fb609a49619a09625.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/kexec.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index dd056fab9e35..2b7590f5483a 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -327,6 +327,14 @@ static inline void *boot_phys_to_virt(unsigned long entry) return phys_to_virt(boot_phys_to_phys(entry)); } +#ifndef arch_kexec_post_alloc_pages +static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { return 0; } +#endif + +#ifndef arch_kexec_pre_free_pages +static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } +#endif + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; -- cgit From 77af0ae44e4a74f58dd741babdacec32fc8042b1 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 27 Jun 2017 08:58:04 -0500 Subject: crypto: ccp - Fix some line spacing Add/remove blank lines as appropriate. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 3285c944194a..c03ee844a99d 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -20,7 +20,6 @@ #include #include - struct ccp_device; struct ccp_cmd; -- cgit From 66d3994c673d211a9ce5da3d7576d90c8e0cd377 Mon Sep 17 00:00:00 2001 From: Tudor-Dan Ambarus Date: Wed, 5 Jul 2017 13:07:58 +0300 Subject: crypto: kpp - add get/set_flags helpers These helpers will be used for fallbacks to kpp software implementations. Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- include/crypto/kpp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h index 2133d17b7156..1bde0a6514fa 100644 --- a/include/crypto/kpp.h +++ b/include/crypto/kpp.h @@ -145,6 +145,16 @@ static inline struct crypto_kpp *crypto_kpp_reqtfm(struct kpp_request *req) return __crypto_kpp_tfm(req->base.tfm); } +static inline u32 crypto_kpp_get_flags(struct crypto_kpp *tfm) +{ + return crypto_tfm_get_flags(crypto_kpp_tfm(tfm)); +} + +static inline void crypto_kpp_set_flags(struct crypto_kpp *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_kpp_tfm(tfm), flags); +} + /** * crypto_free_kpp() - free KPP tfm handle * -- cgit From 720419f01832f7e697cb80480b97b2a1e96045cd Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 6 Jul 2017 09:59:14 -0500 Subject: crypto: ccp - Introduce the AMD Secure Processor device The CCP device is part of the AMD Secure Processor. In order to expand the usage of the AMD Secure Processor, create a framework that allows functional components of the AMD Secure Processor to be initialized and handled appropriately. Signed-off-by: Brijesh Singh Acked-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index c03ee844a99d..3fd9a6dac344 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -23,8 +23,7 @@ struct ccp_device; struct ccp_cmd; -#if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \ - defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE) +#if defined(CONFIG_CRYPTO_DEV_SP_CCP) /** * ccp_present - check if a CCP device is present @@ -70,7 +69,7 @@ unsigned int ccp_version(void); */ int ccp_enqueue_cmd(struct ccp_cmd *cmd); -#else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */ +#else /* CONFIG_CRYPTO_DEV_CCP_SP_DEV is not enabled */ static inline int ccp_present(void) { @@ -87,7 +86,7 @@ static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd) return -ENODEV; } -#endif /* CONFIG_CRYPTO_DEV_CCP_DD */ +#endif /* CONFIG_CRYPTO_DEV_SP_CCP */ /***** AES engine *****/ -- cgit From f384b352cbf0310fd20c379c4710408c70e769b6 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Mon, 26 Jun 2017 15:10:00 +0200 Subject: mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables This patch adds support to the JESD216 rev B standard and parses the SFDP tables to dynamically initialize the 'struct spi_nor_flash_parameter'. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 55faa2f07cca..0df3638ff0b8 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -41,6 +41,8 @@ #define SPINOR_OP_WREN 0x06 /* Write enable */ #define SPINOR_OP_RDSR 0x05 /* Read status register */ #define SPINOR_OP_WRSR 0x01 /* Write status register 1 byte */ +#define SPINOR_OP_RDSR2 0x3f /* Read status register 2 */ +#define SPINOR_OP_WRSR2 0x3e /* Write status register 2 */ #define SPINOR_OP_READ 0x03 /* Read data bytes (low frequency) */ #define SPINOR_OP_READ_FAST 0x0b /* Read data bytes (high frequency) */ #define SPINOR_OP_READ_1_1_2 0x3b /* Read data bytes (Dual Output SPI) */ @@ -56,6 +58,7 @@ #define SPINOR_OP_CHIP_ERASE 0xc7 /* Erase whole flash chip */ #define SPINOR_OP_SE 0xd8 /* Sector erase (usually 64KiB) */ #define SPINOR_OP_RDID 0x9f /* Read JEDEC ID */ +#define SPINOR_OP_RDSFDP 0x5a /* Read SFDP */ #define SPINOR_OP_RDCR 0x35 /* Read configuration register */ #define SPINOR_OP_RDFSR 0x70 /* Read flag status register */ @@ -128,6 +131,9 @@ /* Configuration Register bits. */ #define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */ +/* Status Register 2 bits. */ +#define SR2_QUAD_EN_BIT7 BIT(7) + /* Supported SPI protocols */ #define SNOR_PROTO_INST_MASK GENMASK(23, 16) #define SNOR_PROTO_INST_SHIFT 16 -- cgit From bb789e03f2a140a80ddc7d18b7ef4d626c2a71d8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:34 -0300 Subject: media: cec: improve transmit timeout logging Kernel logging messes up the upcoming low-level CEC monitoring support which is very time-sensitive. So change the debug level of this message but keep a counter that is shown in the debugfs status log. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 56643b27e4b8..e32b0e1a81a4 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -174,6 +174,8 @@ struct cec_adapter { bool passthrough; struct cec_log_addrs log_addrs; + u32 tx_timeouts; + #ifdef CONFIG_CEC_NOTIFIER struct cec_notifier *notifier; #endif -- cgit From 0861ad14c6cfc9dc8bcde44bc23a7a355167eb9f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:35 -0300 Subject: media: cec: add *_ts variants for transmit_done/received_msg Currently the transmit_(attempt_)done and received_msg functions set the timestamp themselves. For the upcoming low-level pin API we need to pass this as an argument instead. So make _ts variants that allow the caller to specify the timestamp. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index e32b0e1a81a4..e1e60dbb66c3 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -228,15 +228,39 @@ int cec_transmit_msg(struct cec_adapter *adap, struct cec_msg *msg, bool block); /* Called by the adapter */ -void cec_transmit_done(struct cec_adapter *adap, u8 status, u8 arb_lost_cnt, - u8 nack_cnt, u8 low_drive_cnt, u8 error_cnt); +void cec_transmit_done_ts(struct cec_adapter *adap, u8 status, + u8 arb_lost_cnt, u8 nack_cnt, u8 low_drive_cnt, + u8 error_cnt, ktime_t ts); + +static inline void cec_transmit_done(struct cec_adapter *adap, u8 status, + u8 arb_lost_cnt, u8 nack_cnt, + u8 low_drive_cnt, u8 error_cnt) +{ + cec_transmit_done_ts(adap, status, arb_lost_cnt, nack_cnt, + low_drive_cnt, error_cnt, ktime_get()); +} /* * Simplified version of cec_transmit_done for hardware that doesn't retry * failed transmits. So this is always just one attempt in which case * the status is sufficient. */ -void cec_transmit_attempt_done(struct cec_adapter *adap, u8 status); -void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg); +void cec_transmit_attempt_done_ts(struct cec_adapter *adap, + u8 status, ktime_t ts); + +static inline void cec_transmit_attempt_done(struct cec_adapter *adap, + u8 status) +{ + cec_transmit_attempt_done_ts(adap, status, ktime_get()); +} + +void cec_received_msg_ts(struct cec_adapter *adap, + struct cec_msg *msg, ktime_t ts); + +static inline void cec_received_msg(struct cec_adapter *adap, + struct cec_msg *msg) +{ + cec_received_msg_ts(adap, msg, ktime_get()); +} /** * cec_get_edid_phys_addr() - find and return the physical address -- cgit From e6259b5f7ab74e399f838cb1abdc12f3e28668f4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:36 -0300 Subject: media: cec: add adap_free op This is needed for CEC adapters that allocate resources that have to be freed before the cec_adapter is deleted. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index e1e60dbb66c3..37768203572d 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -114,6 +114,7 @@ struct cec_adap_ops { int (*adap_transmit)(struct cec_adapter *adap, u8 attempts, u32 signal_free_time, struct cec_msg *msg); void (*adap_status)(struct cec_adapter *adap, struct seq_file *file); + void (*adap_free)(struct cec_adapter *adap); /* High-level CEC message callback */ int (*received)(struct cec_adapter *adap, struct cec_msg *msg); -- cgit From 6303d97873d340e89acdef12effb66f88d79836f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:38 -0300 Subject: media: linux/cec.h: add pin monitoring API support Add support for low-level CEC pin monitoring. This adds a new monitor mode, a new capability and two new events. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index 44579a24f95d..bba73f33c8aa 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -318,6 +318,7 @@ static inline int cec_is_unconfigured(__u16 log_addr_mask) #define CEC_MODE_FOLLOWER (0x1 << 4) #define CEC_MODE_EXCL_FOLLOWER (0x2 << 4) #define CEC_MODE_EXCL_FOLLOWER_PASSTHRU (0x3 << 4) +#define CEC_MODE_MONITOR_PIN (0xd << 4) #define CEC_MODE_MONITOR (0xe << 4) #define CEC_MODE_MONITOR_ALL (0xf << 4) #define CEC_MODE_FOLLOWER_MSK 0xf0 @@ -338,6 +339,8 @@ static inline int cec_is_unconfigured(__u16 log_addr_mask) #define CEC_CAP_MONITOR_ALL (1 << 5) /* Hardware can use CEC only if the HDMI HPD pin is high. */ #define CEC_CAP_NEEDS_HPD (1 << 6) +/* Hardware can monitor CEC pin transitions */ +#define CEC_CAP_MONITOR_PIN (1 << 7) /** * struct cec_caps - CEC capabilities structure. @@ -405,6 +408,8 @@ struct cec_log_addrs { * didn't empty the message queue in time */ #define CEC_EVENT_LOST_MSGS 2 +#define CEC_EVENT_PIN_LOW 3 +#define CEC_EVENT_PIN_HIGH 4 #define CEC_EVENT_FL_INITIAL_STATE (1 << 0) -- cgit From 6b2bbb08747a56dcf4ee33606a06025eca571260 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:39 -0300 Subject: media: cec: rework the cec event handling Event handling was always fairly simplistic since there were only two events. With the addition of pin events this needed to be redesigned. The state_change and lost_msgs events are now core events with the guarantee that the last state is always available. The new pin events are a queue of events (up to 64 for each event) and the oldest event will be dropped if the application cannot keep up. Lost events are marked with a new event flag. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 14 +++++++++++--- include/uapi/linux/cec.h | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 37768203572d..6cc862af74e5 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -81,7 +81,13 @@ struct cec_msg_entry { struct cec_msg msg; }; -#define CEC_NUM_EVENTS CEC_EVENT_LOST_MSGS +struct cec_event_entry { + struct list_head list; + struct cec_event ev; +}; + +#define CEC_NUM_CORE_EVENTS 2 +#define CEC_NUM_EVENTS CEC_EVENT_PIN_HIGH struct cec_fh { struct list_head list; @@ -92,9 +98,11 @@ struct cec_fh { /* Events */ wait_queue_head_t wait; - unsigned int pending_events; - struct cec_event events[CEC_NUM_EVENTS]; struct mutex lock; + struct list_head events[CEC_NUM_EVENTS]; /* queued events */ + u8 queued_events[CEC_NUM_EVENTS]; + unsigned int total_queued_events; + struct cec_event_entry core_events[CEC_NUM_CORE_EVENTS]; struct list_head msgs; /* queued messages */ unsigned int queued_msgs; }; diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index bba73f33c8aa..d87a67b0bb06 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -412,6 +412,7 @@ struct cec_log_addrs { #define CEC_EVENT_PIN_HIGH 4 #define CEC_EVENT_FL_INITIAL_STATE (1 << 0) +#define CEC_EVENT_FL_DROPPED_EVENTS (1 << 1) /** * struct cec_event_state_change - used when the CEC adapter changes state. @@ -424,7 +425,7 @@ struct cec_event_state_change { }; /** - * struct cec_event_lost_msgs - tells you how many messages were lost due. + * struct cec_event_lost_msgs - tells you how many messages were lost. * @lost_msgs: how many messages were lost. */ struct cec_event_lost_msgs { -- cgit From b8d62f50b1df2571afcf47107bbbe9cd60f8aafd Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:41 -0300 Subject: media: cec: add core support for low-level CEC pin monitoring Add support for the new MONITOR_PIN mode. Add the cec_pin_event function that the CEC pin code will call to queue pin change events. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 6cc862af74e5..d983960b37ad 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -177,6 +177,7 @@ struct cec_adapter { bool is_configuring; bool is_configured; u32 monitor_all_cnt; + u32 monitor_pin_cnt; u32 follower_cnt; struct cec_fh *cec_follower; struct cec_fh *cec_initiator; @@ -271,6 +272,16 @@ static inline void cec_received_msg(struct cec_adapter *adap, cec_received_msg_ts(adap, msg, ktime_get()); } +/** + * cec_queue_pin_event() - queue a pin event with a given timestamp. + * + * @adap: pointer to the cec adapter + * @is_high: when true the pin is high, otherwise it is low + * @ts: the timestamp for this event + * + */ +void cec_queue_pin_event(struct cec_adapter *adap, bool is_high, ktime_t ts); + /** * cec_get_edid_phys_addr() - find and return the physical address * -- cgit From ea5c8ef296681b53480ebeeffd06083bb60e693d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 11 Jul 2017 03:30:42 -0300 Subject: media: cec-pin: add low-level pin hardware support Add support for CEC hardware that relies on low-level pin polling or GPIO interrupts. One example is the Allwinner SoC. But any GPIO-based CEC implementation can use this as well. A GPIO implementation is very suitable as well for debugging: it can use interrupts to detect state changes and report it. Userspace can then verify if the bus traffic is correct. This also makes error injection possible. The disadvantage is that it is hard to get the timings right since linux isn't a hard realtime system. In general on an idle system it works quite well, but under load the timer will miss its mark every so often. The debugfs file /sys/kernel/debug/cec/cecX/status gives some statistics with respect to the timer overruns. When the adapter is unconfigured and the low-level driver supports interrupts, then the interrupt will be used to detect changes. This should be quite accurate. But when the adapter is configured a hrtimer has to be used. The hrtimer implements a state machine where for each state the code will read the bus or drive the bus and go on to the next state. It will re-arm the timer with a delay based on the next state. Signed-off-by: Hans Verkuil Reviewed-by: Maxime Ripard Signed-off-by: Mauro Carvalho Chehab --- include/media/cec-pin.h | 183 ++++++++++++++++++++++++++++++++++++++++++++++++ include/media/cec.h | 4 ++ 2 files changed, 187 insertions(+) create mode 100644 include/media/cec-pin.h (limited to 'include') diff --git a/include/media/cec-pin.h b/include/media/cec-pin.h new file mode 100644 index 000000000000..44b82d29d480 --- /dev/null +++ b/include/media/cec-pin.h @@ -0,0 +1,183 @@ +/* + * cec-pin.h - low-level CEC pin control + * + * Copyright 2017 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef LINUX_CEC_PIN_H +#define LINUX_CEC_PIN_H + +#include +#include +#include + +enum cec_pin_state { + /* CEC is off */ + CEC_ST_OFF, + /* CEC is idle, waiting for Rx or Tx */ + CEC_ST_IDLE, + + /* Tx states */ + + /* Pending Tx, waiting for Signal Free Time to expire */ + CEC_ST_TX_WAIT, + /* Low-drive was detected, wait for bus to go high */ + CEC_ST_TX_WAIT_FOR_HIGH, + /* Drive CEC low for the start bit */ + CEC_ST_TX_START_BIT_LOW, + /* Drive CEC high for the start bit */ + CEC_ST_TX_START_BIT_HIGH, + /* Drive CEC low for the 0 bit */ + CEC_ST_TX_DATA_BIT_0_LOW, + /* Drive CEC high for the 0 bit */ + CEC_ST_TX_DATA_BIT_0_HIGH, + /* Drive CEC low for the 1 bit */ + CEC_ST_TX_DATA_BIT_1_LOW, + /* Drive CEC high for the 1 bit */ + CEC_ST_TX_DATA_BIT_1_HIGH, + /* + * Wait for start of sample time to check for Ack bit or first + * four initiator bits to check for Arbitration Lost. + */ + CEC_ST_TX_DATA_BIT_1_HIGH_PRE_SAMPLE, + /* Wait for end of bit period after sampling */ + CEC_ST_TX_DATA_BIT_1_HIGH_POST_SAMPLE, + + /* Rx states */ + + /* Start bit low detected */ + CEC_ST_RX_START_BIT_LOW, + /* Start bit high detected */ + CEC_ST_RX_START_BIT_HIGH, + /* Wait for bit sample time */ + CEC_ST_RX_DATA_SAMPLE, + /* Wait for earliest end of bit period after sampling */ + CEC_ST_RX_DATA_POST_SAMPLE, + /* Wait for CEC to go high (i.e. end of bit period */ + CEC_ST_RX_DATA_HIGH, + /* Drive CEC low to send 0 Ack bit */ + CEC_ST_RX_ACK_LOW, + /* End of 0 Ack time, wait for earliest end of bit period */ + CEC_ST_RX_ACK_LOW_POST, + /* Wait for CEC to go high (i.e. end of bit period */ + CEC_ST_RX_ACK_HIGH_POST, + /* Wait for earliest end of bit period and end of message */ + CEC_ST_RX_ACK_FINISH, + + /* Start low drive */ + CEC_ST_LOW_DRIVE, + /* Monitor pin using interrupts */ + CEC_ST_RX_IRQ, + + /* Total number of pin states */ + CEC_PIN_STATES +}; + +/** + * struct cec_pin_ops - low-level CEC pin operations + * @read: read the CEC pin. Return true if high, false if low. + * @low: drive the CEC pin low. + * @high: stop driving the CEC pin. The pull-up will drive the pin + * high, unless someone else is driving the pin low. + * @enable_irq: optional, enable the interrupt to detect pin voltage changes. + * @disable_irq: optional, disable the interrupt. + * @free: optional. Free any allocated resources. Called when the + * adapter is deleted. + * @status: optional, log status information. + * + * These operations are used by the cec pin framework to manipulate + * the CEC pin. + */ +struct cec_pin_ops { + bool (*read)(struct cec_adapter *adap); + void (*low)(struct cec_adapter *adap); + void (*high)(struct cec_adapter *adap); + bool (*enable_irq)(struct cec_adapter *adap); + void (*disable_irq)(struct cec_adapter *adap); + void (*free)(struct cec_adapter *adap); + void (*status)(struct cec_adapter *adap, struct seq_file *file); +}; + +#define CEC_NUM_PIN_EVENTS 128 + +struct cec_pin { + struct cec_adapter *adap; + const struct cec_pin_ops *ops; + struct task_struct *kthread; + wait_queue_head_t kthread_waitq; + struct hrtimer timer; + ktime_t ts; + unsigned int wait_usecs; + u16 la_mask; + bool enabled; + bool monitor_all; + bool cur_value; + bool rx_eom; + bool enable_irq_failed; + enum cec_pin_state state; + struct cec_msg tx_msg; + u32 tx_bit; + bool tx_nacked; + u32 tx_signal_free_time; + struct cec_msg rx_msg; + u32 rx_bit; + + struct cec_msg work_rx_msg; + u8 work_tx_status; + bool work_enable_irq; + ktime_t work_tx_ts; + atomic_t work_pin_events; + unsigned int work_pin_events_wr; + unsigned int work_pin_events_rd; + ktime_t work_pin_ts[CEC_NUM_PIN_EVENTS]; + bool work_pin_is_high[CEC_NUM_PIN_EVENTS]; + ktime_t timer_ts; + u32 timer_cnt; + u32 timer_100ms_overruns; + u32 timer_300ms_overruns; + u32 timer_max_overrun; + u32 timer_sum_overrun; +}; + +/** + * cec_pin_changed() - update pin state from interrupt + * + * @adap: pointer to the cec adapter + * @value: when true the pin is high, otherwise it is low + * + * If changes of the CEC voltage are detected via an interrupt, then + * cec_pin_changed is called from the interrupt with the new value. + */ +void cec_pin_changed(struct cec_adapter *adap, bool value); + +/** + * cec_pin_allocate_adapter() - allocate a pin-based cec adapter + * + * @pin_ops: low-level pin operations + * @priv: will be stored in adap->priv and can be used by the adapter ops. + * Use cec_get_drvdata(adap) to get the priv pointer. + * @name: the name of the CEC adapter. Note: this name will be copied. + * @caps: capabilities of the CEC adapter. This will be ORed with + * CEC_CAP_MONITOR_ALL and CEC_CAP_MONITOR_PIN. + * + * Allocate a cec adapter using the cec pin framework. + * + * Return: a pointer to the cec adapter or an error pointer + */ +struct cec_adapter *cec_pin_allocate_adapter(const struct cec_pin_ops *pin_ops, + void *priv, const char *name, u32 caps); + +#endif diff --git a/include/media/cec.h b/include/media/cec.h index d983960b37ad..f9cab1a9f912 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -61,6 +61,7 @@ struct cec_devnode { struct cec_adapter; struct cec_data; +struct cec_pin; struct cec_data { struct list_head list; @@ -189,6 +190,9 @@ struct cec_adapter { #ifdef CONFIG_CEC_NOTIFIER struct cec_notifier *notifier; #endif +#ifdef CONFIG_CEC_PIN + struct cec_pin *pin; +#endif struct dentry *cec_dir; struct dentry *status_file; -- cgit From 5150418593015d280dab321da1fe3cea4ed3b693 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 13 Jul 2017 04:07:23 -0300 Subject: media: cec: move cec_register_cec_notifier to cec-notifier.h The cec_register_cec_notifier function was in media/cec.h, but it has to be in cec-notifier.h. While we are at it, also document it and add a stub function for when the notifier is disabled or the CEC core code is unreachable. Based on an earlier patch from Jose Abreu . Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec-notifier.h | 12 ++++++++++++ include/media/cec.h | 5 ----- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h index 298f996969df..73bc98b90afc 100644 --- a/include/media/cec-notifier.h +++ b/include/media/cec-notifier.h @@ -86,6 +86,14 @@ void cec_notifier_register(struct cec_notifier *n, */ void cec_notifier_unregister(struct cec_notifier *n); +/** + * cec_register_cec_notifier - register the notifier with the cec adapter. + * @adap: the CEC adapter + * @notifier: the CEC notifier + */ +void cec_register_cec_notifier(struct cec_adapter *adap, + struct cec_notifier *notifier); + #else static inline struct cec_notifier *cec_notifier_get(struct device *dev) { @@ -116,6 +124,10 @@ static inline void cec_notifier_unregister(struct cec_notifier *n) { } +static inline void cec_register_cec_notifier(struct cec_adapter *adap, + struct cec_notifier *notifier) +{ +} #endif #endif diff --git a/include/media/cec.h b/include/media/cec.h index f9cab1a9f912..224a6e225c52 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -361,11 +361,6 @@ u16 cec_phys_addr_for_input(u16 phys_addr, u8 input); */ int cec_phys_addr_validate(u16 phys_addr, u16 *parent, u16 *port); -#ifdef CONFIG_CEC_NOTIFIER -void cec_register_cec_notifier(struct cec_adapter *adap, - struct cec_notifier *notifier); -#endif - #else static inline int cec_register_adapter(struct cec_adapter *adap, -- cgit From ff60dcedbd99395c3355bc00fd302c31c96c9b92 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 15 Jun 2017 17:06:27 +0200 Subject: pinctrl: samsung: dt-bindings: Use better name for external interrupt function On Exynos, 0xf is always used as value of external interrupt in pin mux function thus a more descriptive macro name can be used. Signed-off-by: Krzysztof Kozlowski --- include/dt-bindings/pinctrl/samsung.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h index b7aa3646208b..ceb672305f59 100644 --- a/include/dt-bindings/pinctrl/samsung.h +++ b/include/dt-bindings/pinctrl/samsung.h @@ -66,7 +66,8 @@ #define EXYNOS_PIN_FUNC_4 4 #define EXYNOS_PIN_FUNC_5 5 #define EXYNOS_PIN_FUNC_6 6 -#define EXYNOS_PIN_FUNC_F 0xf +#define EXYNOS_PIN_FUNC_EINT 0xf +#define EXYNOS_PIN_FUNC_F EXYNOS_PIN_FUNC_EINT /* Drive strengths for Exynos7 FSYS1 block */ #define EXYNOS7_FSYS1_PIN_DRV_LV1 0 -- cgit From 09c7570480f7544ffbf8e6db365208b0b0c154c6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:26 +0200 Subject: xfrm: remove flow cache After rcu conversions performance degradation in forward tests isn't that noticeable anymore. See next patch for some numbers. A followup patcg could then also remove genid from the policies as we do not cache bundles anymore. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/flow.h | 34 ---------------------------------- include/net/flowcache.h | 25 ------------------------- include/net/netns/xfrm.h | 11 ----------- include/net/xfrm.h | 8 -------- 4 files changed, 78 deletions(-) delete mode 100644 include/net/flowcache.h (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index bae198b3039e..f3dc61b29bb5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -218,40 +218,6 @@ static inline unsigned int flow_key_size(u16 family) return 0; } -#define FLOW_DIR_IN 0 -#define FLOW_DIR_OUT 1 -#define FLOW_DIR_FWD 2 - -struct net; -struct sock; -struct flow_cache_ops; - -struct flow_cache_object { - const struct flow_cache_ops *ops; -}; - -struct flow_cache_ops { - struct flow_cache_object *(*get)(struct flow_cache_object *); - int (*check)(struct flow_cache_object *); - void (*delete)(struct flow_cache_object *); -}; - -typedef struct flow_cache_object *(*flow_resolve_t)( - struct net *net, const struct flowi *key, u16 family, - u8 dir, struct flow_cache_object *oldobj, void *ctx); - -struct flow_cache_object *flow_cache_lookup(struct net *net, - const struct flowi *key, u16 family, - u8 dir, flow_resolve_t resolver, - void *ctx); -int flow_cache_init(struct net *net); -void flow_cache_fini(struct net *net); -void flow_cache_hp_init(void); - -void flow_cache_flush(struct net *net); -void flow_cache_flush_deferred(struct net *net); -extern atomic_t flow_cache_genid; - __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) diff --git a/include/net/flowcache.h b/include/net/flowcache.h deleted file mode 100644 index 51eb971e8973..000000000000 --- a/include/net/flowcache.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _NET_FLOWCACHE_H -#define _NET_FLOWCACHE_H - -#include -#include -#include -#include - -struct flow_cache_percpu { - struct hlist_head *hash_table; - unsigned int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct hlist_node node; - unsigned int low_watermark; - unsigned int high_watermark; - struct timer_list rnd_timer; -}; -#endif /* _NET_FLOWCACHE_H */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 27bb9633c69d..611521646dd4 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,7 +6,6 @@ #include #include #include -#include struct ctl_table_header; @@ -73,16 +72,6 @@ struct netns_xfrm { spinlock_t xfrm_state_lock; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; - - /* flow cache part */ - struct flow_cache flow_cache_global; - atomic_t flow_cache_genid; - struct list_head flow_cache_gc_list; - atomic_t flow_cache_gc_count; - spinlock_t flow_cache_gc_lock; - struct work_struct flow_cache_gc_work; - struct work_struct flow_cache_flush_work; - struct mutex flow_flush_sem; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c0916ab18d32..e0feba2ce76a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -563,7 +563,6 @@ struct xfrm_policy { refcount_t refcnt; struct timer_list timer; - struct flow_cache_object flo; atomic_t genid; u32 priority; u32 index; @@ -978,7 +977,6 @@ struct xfrm_dst { struct rt6_info rt6; } u; struct dst_entry *route; - struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; u32 xfrm_genid; @@ -1226,9 +1224,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } } -void xfrm_garbage_collect(struct net *net); -void xfrm_garbage_collect_deferred(struct net *net); - #else static inline void xfrm_sk_free_policy(struct sock *sk) {} @@ -1263,9 +1258,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir, { return 1; } -static inline void xfrm_garbage_collect(struct net *net) -{ -} #endif static __inline__ -- cgit From ec30d78c14a813db39a647b6a348b4286ba4abf5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Jul 2017 13:57:27 +0200 Subject: xfrm: add xdst pcpu cache retain last used xfrm_dst in a pcpu cache. On next request, reuse this dst if the policies are the same. The cache will not help with strict RR workloads as there is no hit. The cache packet-path part is reasonably small, the notifier part is needed so we do not add long hangs when a device is dismantled but some pcpu xdst still holds a reference, there are also calls to the flush operation when userspace deletes SAs so modules can be removed (there is no hit. We need to run the dst_release on the correct cpu to avoid races with packet path. This is done by adding a work_struct for each cpu and then doing the actual test/release on each affected cpu via schedule_work_on(). Test results using 4 network namespaces and null encryption: ns1 ns2 -> ns3 -> ns4 netperf -> xfrm/null enc -> xfrm/null dec -> netserver what TCP_STREAM UDP_STREAM UDP_RR Flow cache: 14644.61 294.35 327231.64 No flow cache: 14349.81 242.64 202301.72 Pcpu cache: 14629.70 292.21 205595.22 UDP tests used 64byte packets, tests ran for one minute each, value is average over ten iterations. 'Flow cache' is 'net-next', 'No flow cache' is net-next plus this series but without this patch. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0feba2ce76a..afb4929d7232 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -317,6 +317,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); +void xfrm_policy_cache_flush(void); void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; -- cgit From 7375ae3a0b79ea072f4c672039f08f5db633b9e1 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 Jul 2017 16:10:34 -0500 Subject: compiler-gcc.h: Introduce __nostackprotector function attribute MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new function attribute, __nostackprotector, that can used to turn off stack protection on a per function basis. Signed-off-by: Tom Lendacky Reviewed-by: Thomas Gleixner Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brijesh Singh Cc: Dave Young Cc: Dmitry Vyukov Cc: Jonathan Corbet Cc: Konrad Rzeszutek Wilk Cc: Larry Woodman Cc: Linus Torvalds Cc: Matt Fleming Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rik van Riel Cc: Toshimitsu Kani Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/0576fd5c74440ad0250f16ac6609ecf587812456.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 2 ++ include/linux/compiler.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cd4bbe8242bd..c28cedde973f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -166,6 +166,8 @@ #if GCC_VERSION >= 40100 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0) + +#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) #endif #if GCC_VERSION >= 40300 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 219f82f3ec1a..3f8c88e29a46 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -470,6 +470,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #define __visible #endif +#ifndef __nostackprotector +# define __nostackprotector +#endif + /* * Assume alignment of return value. */ -- cgit From eb0baf8a0d9259d168523b8e7c436b55ade7c546 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 18 Jul 2017 20:13:09 +0800 Subject: perf/core: Define the common branch type classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is often useful to know the branch types while analyzing branch data. For example, a call is very different from a conditional branch. Currently we have to look it up in binary while the binary may later not be available and even the binary is available but user has to take some time. It is very useful for user to check it directly in perf report. Perf already has support for disassembling the branch instruction to get the x86 branch type. To keep consistent on kernel and userspace and make the classification more common, the patch adds the common branch type classification in perf_event.h. The patch only defines a minimum but most common set of branch types. PERF_BR_UNKNOWN : unknown PERF_BR_COND :conditional PERF_BR_UNCOND : unconditional PERF_BR_IND : indirect PERF_BR_CALL : function call PERF_BR_IND_CALL : indirect function call PERF_BR_RET : function return PERF_BR_SYSCALL : syscall PERF_BR_SYSRET : syscall return PERF_BR_COND_CALL : conditional function call PERF_BR_COND_RET : conditional function return The patch also adds a new field type (4 bits) in perf_branch_entry to record the branch type. Since the disassembling of branch instruction needs some overhead, a new PERF_SAMPLE_BRANCH_TYPE_SAVE is introduced to indicate if it needs to disassemble the branch instruction and record the branch type. Change log: v10: Not changed. v9: Not changed. v8: Change PERF_BR_NONE to PERF_BR_UNKNOWN. No other change. v7: Just keep the most common branch types. Others are removed. v6: Not changed. v5: Not changed. The v5 patch series just change the userspace. v4: Comparing to previous version, the major changes are: 1. Remove the PERF_BR_JCC_FWD/PERF_BR_JCC_BWD, they will be computed later in userspace. 2. Remove the "cross" field in perf_branch_entry. The cross page computing will be done later in userspace. Signed-off-by: Yao Jin Acked-by: Jiri Olsa Acked-by: Michael Ellerman Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Link: http://lkml.kernel.org/r/1500379995-6449-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b1c0b187acfe..642db5fa3286 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ + PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -198,9 +200,30 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = + 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; +/* + * Common flow change classification + */ +enum { + PERF_BR_UNKNOWN = 0, /* unknown */ + PERF_BR_COND = 1, /* conditional */ + PERF_BR_UNCOND = 2, /* unconditional */ + PERF_BR_IND = 3, /* indirect */ + PERF_BR_CALL = 4, /* function call */ + PERF_BR_IND_CALL = 5, /* indirect function call */ + PERF_BR_RET = 6, /* function return */ + PERF_BR_SYSCALL = 7, /* syscall */ + PERF_BR_SYSRET = 8, /* syscall return */ + PERF_BR_COND_CALL = 9, /* conditional function call */ + PERF_BR_COND_RET = 10, /* conditional function return */ + PERF_BR_MAX, +}; + #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ PERF_SAMPLE_BRANCH_KERNEL|\ @@ -1015,6 +1038,7 @@ union perf_mem_data_src { * in_tx: running in a hardware transaction * abort: aborting a hardware transaction * cycles: cycles from last branch (or 0 if not supported) + * type: branch type */ struct perf_branch_entry { __u64 from; @@ -1024,7 +1048,8 @@ struct perf_branch_entry { in_tx:1, /* in transaction */ abort:1, /* transaction abort */ cycles:16, /* cycle count to last branch */ - reserved:44; + type:4, /* branch type */ + reserved:40; }; #endif /* _UAPI_LINUX_PERF_EVENT_H */ -- cgit From 17c82e206d2a3cd876b64921c59116f1ecdce6ad Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Mon, 22 May 2017 16:53:25 +0530 Subject: reset: Add APIs to manage array of resets Many devices may want to request a bunch of resets and control them. So it's better to manage them as an array. Add APIs to _get() an array of reset_control, reusing the _assert(), _deassert(), and _reset() APIs for single reset controls. Since reset controls already may control multiple reset lines with a single hardware bit, from the user perspective, reset control arrays are not at all different from single reset controls. Note that these APIs don't guarantee that the reset lines managed in the array are handled in any particular order. Cc: Felipe Balbi Cc: Jon Hunter Signed-off-by: Vivek Gautam [p.zabel@pengutronix.de: changed API to hide reset control arrays behind struct reset_control] Signed-off-by: Philipp Zabel --- include/linux/reset.h | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'include') diff --git a/include/linux/reset.h b/include/linux/reset.h index 13d8681210d5..56463f37f3e6 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -25,6 +25,11 @@ struct reset_control *__devm_reset_control_get(struct device *dev, int __must_check device_reset(struct device *dev); +struct reset_control *devm_reset_control_array_get(struct device *dev, + bool shared, bool optional); +struct reset_control *of_reset_control_array_get(struct device_node *np, + bool shared, bool optional); + static inline int device_reset_optional(struct device *dev) { return device_reset(dev); @@ -89,6 +94,18 @@ static inline struct reset_control *__devm_reset_control_get( return optional ? NULL : ERR_PTR(-ENOTSUPP); } +static inline struct reset_control * +devm_reset_control_array_get(struct device *dev, bool shared, bool optional) +{ + return optional ? NULL : ERR_PTR(-ENOTSUPP); +} + +static inline struct reset_control * +of_reset_control_array_get(struct device_node *np, bool shared, bool optional) +{ + return optional ? NULL : ERR_PTR(-ENOTSUPP); +} + #endif /* CONFIG_RESET_CONTROLLER */ /** @@ -374,4 +391,55 @@ static inline struct reset_control *devm_reset_control_get_by_index( { return devm_reset_control_get_exclusive_by_index(dev, index); } + +/* + * APIs to manage a list of reset controllers + */ +static inline struct reset_control * +devm_reset_control_array_get_exclusive(struct device *dev) +{ + return devm_reset_control_array_get(dev, false, false); +} + +static inline struct reset_control * +devm_reset_control_array_get_shared(struct device *dev) +{ + return devm_reset_control_array_get(dev, true, false); +} + +static inline struct reset_control * +devm_reset_control_array_get_optional_exclusive(struct device *dev) +{ + return devm_reset_control_array_get(dev, false, true); +} + +static inline struct reset_control * +devm_reset_control_array_get_optional_shared(struct device *dev) +{ + return devm_reset_control_array_get(dev, true, true); +} + +static inline struct reset_control * +of_reset_control_array_get_exclusive(struct device_node *node) +{ + return of_reset_control_array_get(node, false, false); +} + +static inline struct reset_control * +of_reset_control_array_get_shared(struct device_node *node) +{ + return of_reset_control_array_get(node, true, false); +} + +static inline struct reset_control * +of_reset_control_array_get_optional_exclusive(struct device_node *node) +{ + return of_reset_control_array_get(node, false, true); +} + +static inline struct reset_control * +of_reset_control_array_get_optional_shared(struct device_node *node) +{ + return of_reset_control_array_get(node, true, true); +} #endif -- cgit From ddc9e69b9dc23d4c0d8ed829575327746ea77a04 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 18 Jul 2017 20:34:17 +0300 Subject: ASoC: rt5677: Hide platform data in the module sources There is no user of legacy platform data. Remove separate header and hide its content inside module sources. Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/sound/rt5677.h | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 include/sound/rt5677.h (limited to 'include') diff --git a/include/sound/rt5677.h b/include/sound/rt5677.h deleted file mode 100644 index a6207043ac3c..000000000000 --- a/include/sound/rt5677.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * linux/sound/rt5677.h -- Platform data for RT5677 - * - * Copyright 2013 Realtek Semiconductor Corp. - * Author: Oder Chiou - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __LINUX_SND_RT5677_H -#define __LINUX_SND_RT5677_H - -enum rt5677_dmic2_clk { - RT5677_DMIC_CLK1 = 0, - RT5677_DMIC_CLK2 = 1, -}; - - -struct rt5677_platform_data { - /* IN1/IN2/LOUT1/LOUT2/LOUT3 can optionally be differential */ - bool in1_diff; - bool in2_diff; - bool lout1_diff; - bool lout2_diff; - bool lout3_diff; - /* DMIC2 clock source selection */ - enum rt5677_dmic2_clk dmic2_clk_pin; - - /* configures GPIO, 0 - floating, 1 - pulldown, 2 - pullup */ - u8 gpio_config[6]; - - /* jd1 can select 0 ~ 3 as OFF, GPIO1, GPIO2 and GPIO3 respectively */ - unsigned int jd1_gpio; - /* jd2 and jd3 can select 0 ~ 3 as - OFF, GPIO4, GPIO5 and GPIO6 respectively */ - unsigned int jd2_gpio; - unsigned int jd3_gpio; - - /* Set MICBIAS1 VDD 1v8 or 3v3 */ - bool micbias1_vdd_3v3; -}; - -#endif -- cgit From 97bbdf02d905a0a48cb3c953ea352d7f4994643c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 25 Feb 2015 14:39:11 -0500 Subject: media: v4l: Add support for CSI-1 and CCP2 busses CCP2 and CSI-1, are older single data lane serial busses. [mchehab@s-opensource.com: don't use spaces for identation] Signed-off-by: Sakari Ailus Signed-off-by: Pavel Machek Reviewed-by: Sebastian Reichel Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-fwnode.h | 19 +++++++++++++++++++ include/media/v4l2-mediabus.h | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index ecc1233a873e..29ae22bbbbaf 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -55,6 +55,24 @@ struct v4l2_fwnode_bus_parallel { unsigned char data_shift; }; +/** + * struct v4l2_fwnode_bus_mipi_csi1 - CSI-1/CCP2 data bus structure + * @clock_inv: polarity of clock/strobe signal + * false - not inverted, true - inverted + * @strobe: false - data/clock, true - data/strobe + * @lane_polarity: the polarities of the clock (index 0) and data lanes + index (1) + * @data_lane: the number of the data lane + * @clock_lane: the number of the clock lane + */ +struct v4l2_fwnode_bus_mipi_csi1 { + bool clock_inv; + bool strobe; + bool lane_polarity[2]; + unsigned char data_lane; + unsigned char clock_lane; +}; + /** * struct v4l2_fwnode_endpoint - the endpoint data structure * @base: fwnode endpoint of the v4l2_fwnode @@ -72,6 +90,7 @@ struct v4l2_fwnode_endpoint { enum v4l2_mbus_type bus_type; union { struct v4l2_fwnode_bus_parallel parallel; + struct v4l2_fwnode_bus_mipi_csi1 mipi_csi1; struct v4l2_fwnode_bus_mipi_csi2 mipi_csi2; } bus; u64 *link_frequencies; diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 34cc99e093ef..315c167a95dc 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -69,11 +69,15 @@ * @V4L2_MBUS_PARALLEL: parallel interface with hsync and vsync * @V4L2_MBUS_BT656: parallel interface with embedded synchronisation, can * also be used for BT.1120 + * @V4L2_MBUS_CSI1: MIPI CSI-1 serial interface + * @V4L2_MBUS_CCP2: CCP2 (Compact Camera Port 2) * @V4L2_MBUS_CSI2: MIPI CSI-2 serial interface */ enum v4l2_mbus_type { V4L2_MBUS_PARALLEL, V4L2_MBUS_BT656, + V4L2_MBUS_CSI1, + V4L2_MBUS_CCP2, V4L2_MBUS_CSI2, }; -- cgit From bb4d991a28cc86a2dfbeefeff32911ca9f779c18 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 19 Jul 2017 15:41:26 -0700 Subject: tcp: adjust tail loss probe timeout This patch adjusts the timeout formula to schedule the TCP loss probe (TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if only one packet is in flight. It keeps a lower bound of 10 msec which is too large for short RTT connections (e.g. within a data-center). The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which performs better for short and fast connections. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..4f056ea79df2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #endif #define TCP_RTO_MAX ((unsigned)(120*HZ)) #define TCP_RTO_MIN ((unsigned)(HZ/5)) +#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now * used as a fallback RTO for the @@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes * for local resources. */ -#define TCP_REO_TIMEOUT_MIN (2000) /* Min RACK reordering timeout in usec */ - #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */ #define TCP_KEEPALIVE_PROBES 9 /* Max of 9 keepalive probes */ #define TCP_KEEPALIVE_INTVL (75*HZ) -- cgit From e7d53ad3239de636ea478fc003d3652b49b8e593 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 18 Jul 2017 16:23:56 -0400 Subject: net: dsa: unexport dsa_is_port_initialized The dsa_is_port_initialized helper is only used by dsa_switch_resume and dsa_switch_suspend, if CONFIG_PM_SLEEP is enabled. Make it static to dsa.c. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 58969b9a090c..88da272d20d0 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -256,11 +256,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p) return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p); } -static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p) -{ - return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev; -} - static inline u8 dsa_upstream_port(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; -- cgit From 7051b88a35c7dde5705923833117e14f9cc17d92 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 18 Jul 2017 15:59:27 -0700 Subject: net: make dev_close and related functions void There is no useful return value from dev_close. All paths return 0. Change dev_close and helper functions to void. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c60351b84323..614642eb7eb7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2432,8 +2432,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int dev_open(struct net_device *dev); -int dev_close(struct net_device *dev); -int dev_close_many(struct list_head *head, bool unlink); +void dev_close(struct net_device *dev); +void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); -- cgit From e0be864f14240cb1bd92247a672796239d6ef2fa Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 19 Jul 2017 21:45:11 +0300 Subject: ARC: reset: introduce HSDKv1 reset driver The HSDK v1 periphery IPs can be reset by accessing some registers from the CGU block. The list of available reset lines is documented in the DT bindings. Signed-off-by: Eugeniy Paltsev Signed-off-by: Philipp Zabel --- include/dt-bindings/reset/snps,hsdk-v1-reset.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/dt-bindings/reset/snps,hsdk-v1-reset.h (limited to 'include') diff --git a/include/dt-bindings/reset/snps,hsdk-v1-reset.h b/include/dt-bindings/reset/snps,hsdk-v1-reset.h new file mode 100644 index 000000000000..d898c89b7123 --- /dev/null +++ b/include/dt-bindings/reset/snps,hsdk-v1-reset.h @@ -0,0 +1,17 @@ +/** + * This header provides index for the HSDK v1 reset controller. + */ +#ifndef _DT_BINDINGS_RESET_CONTROLLER_HSDK_V1 +#define _DT_BINDINGS_RESET_CONTROLLER_HSDK_V1 + +#define HSDK_V1_APB_RESET 0 +#define HSDK_V1_AXI_RESET 1 +#define HSDK_V1_ETH_RESET 2 +#define HSDK_V1_USB_RESET 3 +#define HSDK_V1_SDIO_RESET 4 +#define HSDK_V1_HDMI_RESET 5 +#define HSDK_V1_GFX_RESET 6 +#define HSDK_V1_DMAC_RESET 7 +#define HSDK_V1_EBI_RESET 8 + +#endif /*_DT_BINDINGS_RESET_CONTROLLER_HSDK_V1*/ -- cgit From f1923010b03c3c41c332dd11b93022d049da3104 Mon Sep 17 00:00:00 2001 From: Todor Tomov Date: Fri, 7 Jul 2017 04:48:47 -0400 Subject: media: v4l2-mediabus: Add helper functions Add helper functions for mbus to/from mplane pixel format conversion. Signed-off-by: Todor Tomov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-mediabus.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h index 315c167a95dc..93f8afcb7a22 100644 --- a/include/media/v4l2-mediabus.h +++ b/include/media/v4l2-mediabus.h @@ -117,4 +117,30 @@ static inline void v4l2_fill_mbus_format(struct v4l2_mbus_framefmt *mbus_fmt, mbus_fmt->code = code; } +static inline void v4l2_fill_pix_format_mplane( + struct v4l2_pix_format_mplane *pix_mp_fmt, + const struct v4l2_mbus_framefmt *mbus_fmt) +{ + pix_mp_fmt->width = mbus_fmt->width; + pix_mp_fmt->height = mbus_fmt->height; + pix_mp_fmt->field = mbus_fmt->field; + pix_mp_fmt->colorspace = mbus_fmt->colorspace; + pix_mp_fmt->ycbcr_enc = mbus_fmt->ycbcr_enc; + pix_mp_fmt->quantization = mbus_fmt->quantization; + pix_mp_fmt->xfer_func = mbus_fmt->xfer_func; +} + +static inline void v4l2_fill_mbus_format_mplane( + struct v4l2_mbus_framefmt *mbus_fmt, + const struct v4l2_pix_format_mplane *pix_mp_fmt) +{ + mbus_fmt->width = pix_mp_fmt->width; + mbus_fmt->height = pix_mp_fmt->height; + mbus_fmt->field = pix_mp_fmt->field; + mbus_fmt->colorspace = pix_mp_fmt->colorspace; + mbus_fmt->ycbcr_enc = pix_mp_fmt->ycbcr_enc; + mbus_fmt->quantization = pix_mp_fmt->quantization; + mbus_fmt->xfer_func = pix_mp_fmt->xfer_func; +} + #endif -- cgit From a2b426267c56773201f968fdb5eda6ab9ae94e34 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Apr 2017 14:12:15 -0500 Subject: userns,pidns: Verify the userns for new pid namespaces It is pointless and confusing to allow a pid namespace hierarchy and the user namespace hierarchy to get out of sync. The owner of a child pid namespace should be the owner of the parent pid namespace or a descendant of the owner of the parent pid namespace. Otherwise it is possible to construct scenarios where a process has a capability over a parent pid namespace but does not have the capability over a child pid namespace. Which confusingly makes permission checks non-transitive. It requires use of setns into a pid namespace (but not into a user namespace) to create such a scenario. Add the function in_userns to help in making this determination. v2: Optimized in_userns by using level as suggested by: Kirill Tkhai Ref: 49f4d8b93ccf ("pidns: Capture the user namespace and filter ns_last_pid") Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 32354b4b4b2b..4005877bb8b6 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -112,8 +112,9 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); +extern bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child); extern bool current_in_userns(const struct user_namespace *target_ns); - struct ns_common *ns_get_owner(struct ns_common *ns); #else @@ -144,6 +145,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) return true; } +static inline bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child) +{ + return true; +} + static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; -- cgit From 0c2021c047baa75f88996f2c732ecbac3e4fba29 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Jul 2017 15:22:51 +0800 Subject: ACPICA: IORT: Update SMMU models for revision C ACPICA commit d00a4eb86e64bb4fa70f57ab5e5ca0a4ca2ad8ef IORT revision C has been published with a number of new SMMU implementation identifiers; define them. Link: https://github.com/acpica/acpica/commit/d00a4eb8 Signed-off-by: Robin Murphy Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 707dda74c272..5829eedbf3ff 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -664,7 +664,7 @@ struct acpi_ibft_target { * IORT - IO Remapping Table * * Conforms to "IO Remapping Table System Software on ARM Platforms", - * Document number: ARM DEN 0049B, October 2015 + * Document number: ARM DEN 0049C, May 2017 * ******************************************************************************/ @@ -779,6 +779,8 @@ struct acpi_iort_smmu { #define ACPI_IORT_SMMU_V2 0x00000001 /* Generic SMMUv2 */ #define ACPI_IORT_SMMU_CORELINK_MMU400 0x00000002 /* ARM Corelink MMU-400 */ #define ACPI_IORT_SMMU_CORELINK_MMU500 0x00000003 /* ARM Corelink MMU-500 */ +#define ACPI_IORT_SMMU_CORELINK_MMU401 0x00000004 /* ARM Corelink MMU-401 */ +#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x00000005 /* Cavium thunder_x SMMUv2 */ /* Masks for Flags field above */ @@ -799,13 +801,19 @@ struct acpi_iort_smmu_v3 { u32 flags; u32 reserved; u64 vatos_address; - u32 model; /* O: generic SMMUv3 */ + u32 model; u32 event_gsiv; u32 pri_gsiv; u32 gerr_gsiv; u32 sync_gsiv; }; +/* Values for Model field above */ + +#define ACPI_IORT_SMMU_V3_GENERIC 0x00000000 /* Generic SMMUv3 */ +#define ACPI_IORT_SMMU_V3_HISILICON_HI161X 0x00000001 /* hi_silicon Hi161x SMMUv3 */ +#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x00000002 /* Cavium CN99xx SMMUv3 */ + /* Masks for Flags field above */ #define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE (1) -- cgit From 99e597adf60e1a6f83ec746db8cab225b16a838b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Jul 2017 15:23:08 +0800 Subject: Back port of "ACPICA: Use designated initializers" ACPICA commit 47538f5f0773c0820d8f552e20f6e77104290c01 The following commit is not correctly linuxized by its ACPICA form (see link #1 for reference): Commit: 3d867f6c5fd6535cdeceef3170e5e84e5dd80fc1 Subject: ACPICA: Use designated initializers Thus breaks linuxize process. This patch is a linuxized back port result of the upstreamed ACPICA commit (see link #2 for reference). Link: https://github.com/acpica/acpica/pull/248/ [#1] Link: https://github.com/acpica/acpica/commit/47538f5f [#2] Signed-off-by: Kees Cook Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/acenv.h | 9 +++++---- include/acpi/platform/aclinux.h | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 912563c66948..043fd559de6e 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -288,6 +288,11 @@ #define ACPI_INLINE #endif +/* Use ordered initialization if compiler doesn't support designated. */ +#ifndef ACPI_STRUCT_INIT +#define ACPI_STRUCT_INIT(field, value) value +#endif + /* * Configurable calling conventions: * @@ -382,8 +387,4 @@ #define ACPI_INIT_FUNCTION #endif -#ifndef ACPI_STRUCT_INIT -#define ACPI_STRUCT_INIT(field, value) value -#endif - #endif /* __ACENV_H__ */ diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 047f13865608..afd95f28c8f0 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -178,6 +178,9 @@ #define ACPI_MSG_BIOS_ERROR KERN_ERR "ACPI BIOS Error (bug): " #define ACPI_MSG_BIOS_WARNING KERN_WARNING "ACPI BIOS Warning (bug): " +/* + * Linux wants to use designated initializers for function pointer structs. + */ #define ACPI_STRUCT_INIT(field, value) .field = value #else /* !__KERNEL__ */ -- cgit From 023e2ee16c51da7f6a9455ac936e4fb00295f47a Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 10 Jul 2017 15:23:45 +0800 Subject: ACPICA: Tables: Change table duplication check to be related to acpi_gbl_verify_table_checksum ACPICA commit 3d837b5d4b1033942b4d91c7d3801a09c3157918 acpi_gbl_verify_table_checksum is used to avoid validating (mapping) an entire table in OS boot stage. 2nd "Reload" check in acpi_tb_install_standard_table() is prepared for the same purpose. So this patch combines them together using a renamed acpi_gbl_enable_table_validation flag. Lv Zheng. Link: https://github.com/acpica/acpica/commit/3d837b5d Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index a59c44c3edd8..12dac84c98c9 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -160,13 +160,14 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_create_osi_method, TRUE); ACPI_INIT_GLOBAL(u8, acpi_gbl_use_default_register_widths, TRUE); /* - * Whether or not to verify the table checksum before installation. Set - * this to TRUE to verify the table checksum before install it to the table - * manager. Note that enabling this option causes errors to happen in some - * OSPMs during early initialization stages. Default behavior is to do such - * verification. - */ -ACPI_INIT_GLOBAL(u8, acpi_gbl_verify_table_checksum, TRUE); + * Whether or not to validate (map) an entire table to verify + * checksum/duplication in early stage before install. Set this to TRUE to + * allow early table validation before install it to the table manager. + * Note that enabling this option causes errors to happen in some OSPMs + * during early initialization stages. Default behavior is to allow such + * validation. + */ +ACPI_INIT_GLOBAL(u8, acpi_gbl_enable_table_validation, TRUE); /* * Optionally enable output from the AML Debug Object. -- cgit From 19df56bdf0833a8ad9dfe056931696b9e0e30463 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 10 Jul 2017 15:23:56 +0800 Subject: ACPICA: Tables: Add deferred table verification support ACPICA commit 2dd6c151d5d5e76dacba8f7db9e259fc72982d17 ACPICA commit ffddee6638aced83be18b8bc88569586c1a43e03 This patch allows tables not verified in early stage verfied in acpi_reallocate_root_table(). This is useful for OSPMs like linux where tables cannot be verified in early stage due to early ioremp limitations on some architectures. Reported by Hans de Geode, fixed by Lv Zheng. Link: https://github.com/acpica/acpica/commit/2dd6c151 Link: https://github.com/acpica/acpica/commit/ffddee66 Reported-by: Hans de Goede Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index bdc55c0da19c..89509b86cb54 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -394,6 +394,7 @@ struct acpi_table_desc { #define ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL (1) /* Physical address, internally mapped */ #define ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL (2) /* Virtual address, internallly allocated */ #define ACPI_TABLE_ORIGIN_MASK (3) +#define ACPI_TABLE_IS_VERIFIED (4) #define ACPI_TABLE_IS_LOADED (8) /* -- cgit From c944230064eb65e4fa018d86779b4fd200b1d7e7 Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Mon, 10 Jul 2017 15:24:15 +0800 Subject: ACPICA: iasl: Update to IORT SMMUv3 disassembling ACPICA commit 8cadc4fb500e2aa52241e367c87a0f95d9760c58 ARM IORT specification has provision to define Proximity domain in SMMUv3 IORT table. Adding required changes to decode Proximity domain of SMMUv3 IORT table. Link: https://github.com/acpica/acpica/commit/8cadc4fb Signed-off-by: Ganapatrao Kulkarni Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 5829eedbf3ff..d568948d3a82 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -806,6 +806,9 @@ struct acpi_iort_smmu_v3 { u32 pri_gsiv; u32 gerr_gsiv; u32 sync_gsiv; + u8 pxm; + u8 reserved1; + u16 reserved2; }; /* Values for Model field above */ @@ -818,6 +821,7 @@ struct acpi_iort_smmu_v3 { #define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE (1) #define ACPI_IORT_SMMU_V3_HTTU_OVERRIDE (1<<1) +#define ACPI_IORT_SMMU_V3_PXM_VALID (1<<3) /******************************************************************************* * -- cgit From ab539eaa50c6a97f1d6287c5ce3ee75155cb10b8 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Mon, 10 Jul 2017 15:24:27 +0800 Subject: ACPICA: Update version to 20170629 ACPICA commit 7271c1c54c095c06ed9e7d28641f2356da840038 Version 20170629 Link: https://github.com/acpica/acpica/commit/7271c1c5 Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 12dac84c98c9..06b87adc97ef 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20170531 +#define ACPI_CA_VERSION 0x20170629 #include #include -- cgit From d75cf0144f150272be806b69b4e62553ba07ea1b Mon Sep 17 00:00:00 2001 From: Prabhakar Lad Date: Thu, 20 Jul 2017 04:56:31 -0400 Subject: media: platform: davinci: drop VPFE_CMD_S_CCDC_RAW_PARAMS drop VPFE_CMD_S_CCDC_RAW_PARAMS ioctl from dm355/dm644x following reasons: - This ioctl was never in public api and was only defined in kernel header. - The function set_params constantly mixes up pointers and phys_addr_t numbers. - This is part of a 'VPFE_CMD_S_CCDC_RAW_PARAMS' ioctl command that is described as an 'experimental ioctl that will change in future kernels'. - The code to allocate the table never gets called after we copy_from_user the user input over the kernel settings, and then compare them for inequality. - We then go on to use an address provided by user space as both the __user pointer for input and pass it through phys_to_virt to come up with a kernel pointer to copy the data to. This looks like a trivially exploitable root hole. Signed-off-by: Lad, Prabhakar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/davinci/dm644x_ccdc.h | 12 ------------ include/media/davinci/vpfe_capture.h | 10 ---------- 2 files changed, 22 deletions(-) (limited to 'include') diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h index 7c909da29d43..6ea2ce241851 100644 --- a/include/media/davinci/dm644x_ccdc.h +++ b/include/media/davinci/dm644x_ccdc.h @@ -103,16 +103,6 @@ struct ccdc_black_compensation { char gb; }; -/* structure for fault pixel correction */ -struct ccdc_fault_pixel { - /* Enable or Disable fault pixel correction */ - unsigned char enable; - /* Number of fault pixel */ - unsigned short fp_num; - /* Address of fault pixel table */ - unsigned long fpc_table_addr; -}; - /* Structure for CCDC configuration parameters for raw capture mode passed * by application */ @@ -125,8 +115,6 @@ struct ccdc_config_params_raw { struct ccdc_black_clamp blk_clamp; /* Structure for Black Compensation */ struct ccdc_black_compensation blk_comp; - /* Structure for Fault Pixel Module Configuration */ - struct ccdc_fault_pixel fault_pxl; }; diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h index 8e1a4d88daa0..f003533602d0 100644 --- a/include/media/davinci/vpfe_capture.h +++ b/include/media/davinci/vpfe_capture.h @@ -183,14 +183,4 @@ struct vpfe_config_params { }; #endif /* End of __KERNEL__ */ -/** - * VPFE_CMD_S_CCDC_RAW_PARAMS - EXPERIMENTAL IOCTL to set raw capture params - * This can be used to configure modules such as defect pixel correction, - * color space conversion, culling etc. This is an experimental ioctl that - * will change in future kernels. So use this ioctl with care ! - * TODO: This is to be split into multiple ioctls and also explore the - * possibility of extending the v4l2 api to include this - **/ -#define VPFE_CMD_S_CCDC_RAW_PARAMS _IOW('V', BASE_VIDIOC_PRIVATE + 1, \ - void *) #endif /* _DAVINCI_VPFE_H */ -- cgit From 4ee236219f6da367093b503b52fcceb609397c6f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 26 Jun 2017 14:07:54 -0400 Subject: media: v4l2-fwnode: suppress a warning at OF parsing logic smatch produce this warning: drivers/media/v4l2-core/v4l2-fwnode.c:76 v4l2_fwnode_endpoint_parse_csi_bus() error: buffer overflow 'array' 5 <= u16max That's because, in thesis, the routine might have called with some value at bus->num_data_lanes. That's not the current case. Yet, better to shut up this warning, and make the code more reliable if some future changes might cause a bug. While here, simplify the code a little bit by reading only once from lanes-properties array. Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-fwnode.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index 29ae22bbbbaf..b373c43f65e8 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -26,6 +26,8 @@ struct fwnode_handle; +#define MAX_DATA_LANES 4 + /** * struct v4l2_fwnode_bus_mipi_csi2 - MIPI CSI-2 bus data structure * @flags: media bus (V4L2_MBUS_*) flags @@ -37,10 +39,10 @@ struct fwnode_handle; */ struct v4l2_fwnode_bus_mipi_csi2 { unsigned int flags; - unsigned char data_lanes[4]; + unsigned char data_lanes[MAX_DATA_LANES]; unsigned char clock_lane; unsigned short num_data_lanes; - bool lane_polarities[5]; + bool lane_polarities[MAX_DATA_LANES + 1]; }; /** -- cgit From 8bf3560f6c66832baaae6eba43da552550ed82c9 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Fri, 14 Jul 2017 15:02:41 +0800 Subject: dt-bindings: power: add RK3366 SoCs header for power-domain According to a description from TRM, add all the power domains. Signed-off-by: Elaine Zhang Signed-off-by: Heiko Stuebner --- include/dt-bindings/power/rk3366-power.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/dt-bindings/power/rk3366-power.h (limited to 'include') diff --git a/include/dt-bindings/power/rk3366-power.h b/include/dt-bindings/power/rk3366-power.h new file mode 100644 index 000000000000..223a3dce049a --- /dev/null +++ b/include/dt-bindings/power/rk3366-power.h @@ -0,0 +1,24 @@ +#ifndef __DT_BINDINGS_POWER_RK3366_POWER_H__ +#define __DT_BINDINGS_POWER_RK3366_POWER_H__ + +/* VD_CORE */ +#define RK3366_PD_A53_0 0 +#define RK3366_PD_A53_1 1 +#define RK3366_PD_A53_2 2 +#define RK3366_PD_A53_3 3 + +/* VD_LOGIC */ +#define RK3366_PD_BUS 4 +#define RK3366_PD_PERI 5 +#define RK3366_PD_VIO 6 +#define RK3366_PD_VIDEO 7 +#define RK3366_PD_RKVDEC 8 +#define RK3366_PD_WIFIBT 9 +#define RK3366_PD_VPU 10 +#define RK3366_PD_GPU 11 +#define RK3366_PD_ALIVE 12 + +/* VD_PMU */ +#define RK3366_PD_PMU 13 + +#endif -- cgit From 6538b02d210f52ef2a2e67d59fcb58be98451fbd Mon Sep 17 00:00:00 2001 From: Todor Tomov Date: Mon, 3 Jul 2017 08:08:11 -0400 Subject: media: Make parameter of media_entity_remote_pad() const The local pad parameter in media_entity_remote_pad() is not modified. Make that explicit by adding a const modifier. Signed-off-by: Todor Tomov Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/media-entity.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 754182d29668..222d379960b7 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -805,7 +805,7 @@ struct media_link *media_entity_find_link(struct media_pad *source, * Return: returns a pointer to the pad at the remote end of the first found * enabled link, or %NULL if no enabled link has been found. */ -struct media_pad *media_entity_remote_pad(struct media_pad *pad); +struct media_pad *media_entity_remote_pad(const struct media_pad *pad); /** * media_entity_get - Get a reference to the parent module -- cgit From 727f8914477e4642c7d1ff381667cdc4178b40c6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Jul 2017 10:39:26 +0100 Subject: rxrpc: Expose UAPI definitions to userspace Move UAPI definitions from the internal header and place them in a UAPI header file so that userspace can make use of them. Signed-off-by: David Howells --- include/linux/rxrpc.h | 79 --------------------------------------------- include/uapi/linux/rxrpc.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 79 deletions(-) delete mode 100644 include/linux/rxrpc.h create mode 100644 include/uapi/linux/rxrpc.h (limited to 'include') diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h deleted file mode 100644 index 7343f71783dc..000000000000 --- a/include/linux/rxrpc.h +++ /dev/null @@ -1,79 +0,0 @@ -/* AF_RXRPC parameters - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_RXRPC_H -#define _LINUX_RXRPC_H - -#include -#include - -/* - * RxRPC socket address - */ -struct sockaddr_rxrpc { - sa_family_t srx_family; /* address family */ - u16 srx_service; /* service desired */ - u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ - u16 transport_len; /* length of transport address */ - union { - sa_family_t family; /* transport address family */ - struct sockaddr_in sin; /* IPv4 transport address */ - struct sockaddr_in6 sin6; /* IPv6 transport address */ - } transport; -}; - -/* - * RxRPC socket options - */ -#define RXRPC_SECURITY_KEY 1 /* [clnt] set client security key */ -#define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */ -#define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ -#define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ -#define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ -#define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ - -/* - * RxRPC control messages - * - If neither abort or accept are specified, the message is a data message. - * - terminal messages mean that a user call ID tag can be recycled - * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() - */ -enum rxrpc_cmsg_type { - RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ - RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ - RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ - RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ - RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ - RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ - RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ - RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ - RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ - RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ - RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ - RXRPC__SUPPORTED -}; - -/* - * RxRPC security levels - */ -#define RXRPC_SECURITY_PLAIN 0 /* plain secure-checksummed packets only */ -#define RXRPC_SECURITY_AUTH 1 /* authenticated packets */ -#define RXRPC_SECURITY_ENCRYPT 2 /* encrypted packets */ - -/* - * RxRPC security indices - */ -#define RXRPC_SECURITY_NONE 0 /* no security protocol */ -#define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */ -#define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ -#define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ - -#endif /* _LINUX_RXRPC_H */ diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h new file mode 100644 index 000000000000..08e2fb9c70ae --- /dev/null +++ b/include/uapi/linux/rxrpc.h @@ -0,0 +1,80 @@ +/* Types and definitions for AF_RXRPC. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_RXRPC_H +#define _UAPI_LINUX_RXRPC_H + +#include +#include +#include + +/* + * RxRPC socket address + */ +struct sockaddr_rxrpc { + sa_family_t srx_family; /* address family */ + u16 srx_service; /* service desired */ + u16 transport_type; /* type of transport socket (SOCK_DGRAM) */ + u16 transport_len; /* length of transport address */ + union { + sa_family_t family; /* transport address family */ + struct sockaddr_in sin; /* IPv4 transport address */ + struct sockaddr_in6 sin6; /* IPv6 transport address */ + } transport; +}; + +/* + * RxRPC socket options + */ +#define RXRPC_SECURITY_KEY 1 /* [clnt] set client security key */ +#define RXRPC_SECURITY_KEYRING 2 /* [srvr] set ring of server security keys */ +#define RXRPC_EXCLUSIVE_CONNECTION 3 /* Deprecated; use RXRPC_EXCLUSIVE_CALL instead */ +#define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */ +#define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */ +#define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */ + +/* + * RxRPC control messages + * - If neither abort or accept are specified, the message is a data message. + * - terminal messages mean that a user call ID tag can be recycled + * - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg() + */ +enum rxrpc_cmsg_type { + RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */ + RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */ + RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */ + RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */ + RXRPC_BUSY = 6, /* -r: server busy received [terminal] */ + RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */ + RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */ + RXRPC_ACCEPT = 9, /* s-: [Service] accept request */ + RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */ + RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */ + RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */ + RXRPC__SUPPORTED +}; + +/* + * RxRPC security levels + */ +#define RXRPC_SECURITY_PLAIN 0 /* plain secure-checksummed packets only */ +#define RXRPC_SECURITY_AUTH 1 /* authenticated packets */ +#define RXRPC_SECURITY_ENCRYPT 2 /* encrypted packets */ + +/* + * RxRPC security indices + */ +#define RXRPC_SECURITY_NONE 0 /* no security protocol */ +#define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */ +#define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ +#define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ + +#endif /* _UAPI_LINUX_RXRPC_H */ -- cgit From ddc6c70f07bb1f6dd39a2c6c430f7b4fa95199c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 21 Jul 2017 10:07:10 +0100 Subject: rxrpc: Move the packet.h include file into net/rxrpc/ Move the protocol description header file into net/rxrpc/ and rename it to protocol.h. It's no longer necessary to expose it as packets are no longer exposed to kernel services (such as AFS) that use the facility. The abort codes are transferred to the UAPI header instead as we pass these back to userspace and also to kernel services. Signed-off-by: David Howells --- include/rxrpc/packet.h | 235 --------------------------------------------- include/uapi/linux/rxrpc.h | 44 +++++++++ 2 files changed, 44 insertions(+), 235 deletions(-) delete mode 100644 include/rxrpc/packet.h (limited to 'include') diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h deleted file mode 100644 index a2dcfb850b9f..000000000000 --- a/include/rxrpc/packet.h +++ /dev/null @@ -1,235 +0,0 @@ -/* packet.h: Rx packet layout and definitions - * - * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_RXRPC_PACKET_H -#define _LINUX_RXRPC_PACKET_H - -typedef u32 rxrpc_seq_t; /* Rx message sequence number */ -typedef u32 rxrpc_serial_t; /* Rx message serial number */ -typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */ -typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ - -/*****************************************************************************/ -/* - * on-the-wire Rx packet header - * - all multibyte fields should be in network byte order - */ -struct rxrpc_wire_header { - __be32 epoch; /* client boot timestamp */ -#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ - - __be32 cid; /* connection and channel ID */ -#define RXRPC_MAXCALLS 4 /* max active calls per conn */ -#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */ -#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */ -#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */ -#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ - - __be32 callNumber; /* call ID (0 for connection-level packets) */ - __be32 seq; /* sequence number of pkt in call stream */ - __be32 serial; /* serial number of pkt sent to network */ - - uint8_t type; /* packet type */ -#define RXRPC_PACKET_TYPE_DATA 1 /* data */ -#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */ -#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */ -#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */ -#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */ -#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */ -#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */ -#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */ -#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ -#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ - - uint8_t flags; /* packet flags */ -#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ -#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */ -#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */ -#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */ -#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */ -#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */ - - uint8_t userStatus; /* app-layer defined status */ -#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */ - - uint8_t securityIndex; /* security protocol ID */ - union { - __be16 _rsvd; /* reserved */ - __be16 cksum; /* kerberos security checksum */ - }; - __be16 serviceId; /* service ID */ - -} __packed; - -#define RXRPC_SUPPORTED_PACKET_TYPES ( \ - (1 << RXRPC_PACKET_TYPE_DATA) | \ - (1 << RXRPC_PACKET_TYPE_ACK) | \ - (1 << RXRPC_PACKET_TYPE_BUSY) | \ - (1 << RXRPC_PACKET_TYPE_ABORT) | \ - (1 << RXRPC_PACKET_TYPE_ACKALL) | \ - (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ - (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ - /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ - (1 << RXRPC_PACKET_TYPE_VERSION)) - -/*****************************************************************************/ -/* - * jumbo packet secondary header - * - can be mapped to read header by: - * - new_serial = serial + 1 - * - new_seq = seq + 1 - * - new_flags = j_flags - * - new__rsvd = j__rsvd - * - duplicating all other fields - */ -struct rxrpc_jumbo_header { - uint8_t flags; /* packet flags (as per rxrpc_header) */ - uint8_t pad; - union { - __be16 _rsvd; /* reserved */ - __be16 cksum; /* kerberos security checksum */ - }; -}; - -#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ -#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) - -/*****************************************************************************/ -/* - * on-the-wire Rx ACK packet data payload - * - all multibyte fields should be in network byte order - */ -struct rxrpc_ackpacket { - __be16 bufferSpace; /* number of packet buffers available */ - __be16 maxSkew; /* diff between serno being ACK'd and highest serial no - * received */ - __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */ - __be32 previousPacket; /* sequence no of previous packet received */ - __be32 serial; /* serial no of packet that prompted this ACK */ - - uint8_t reason; /* reason for ACK */ -#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */ -#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */ -#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */ -#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */ -#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */ -#define RXRPC_ACK_PING 6 /* keep alive ACK */ -#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ -#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ -#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ -#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ - - uint8_t nAcks; /* number of ACKs */ -#define RXRPC_MAXACKS 255 - - uint8_t acks[0]; /* list of ACK/NAKs */ -#define RXRPC_ACK_TYPE_NACK 0 -#define RXRPC_ACK_TYPE_ACK 1 - -} __packed; - -/* Some ACKs refer to specific packets and some are general and can be updated. */ -#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ - (1 << RXRPC_ACK_PING_RESPONSE) | \ - (1 << RXRPC_ACK_DELAY) | \ - (1 << RXRPC_ACK_IDLE)) - - -/* - * ACK packets can have a further piece of information tagged on the end - */ -struct rxrpc_ackinfo { - __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ - __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ - __be32 rwind; /* Rx window size (packets) [AFS 3.4] */ - __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */ -}; - -/*****************************************************************************/ -/* - * Kerberos security type-2 challenge packet - */ -struct rxkad_challenge { - __be32 version; /* version of this challenge type */ - __be32 nonce; /* encrypted random number */ - __be32 min_level; /* minimum security level */ - __be32 __padding; /* padding to 8-byte boundary */ -} __packed; - -/*****************************************************************************/ -/* - * Kerberos security type-2 response packet - */ -struct rxkad_response { - __be32 version; /* version of this response type */ - __be32 __pad; - - /* encrypted bit of the response */ - struct { - __be32 epoch; /* current epoch */ - __be32 cid; /* parent connection ID */ - __be32 checksum; /* checksum */ - __be32 securityIndex; /* security type */ - __be32 call_id[4]; /* encrypted call IDs */ - __be32 inc_nonce; /* challenge nonce + 1 */ - __be32 level; /* desired level */ - } encrypted; - - __be32 kvno; /* Kerberos key version number */ - __be32 ticket_len; /* Kerberos ticket length */ -} __packed; - -/*****************************************************************************/ -/* - * RxRPC-level abort codes - */ -#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */ -#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */ -#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */ -#define RX_EOF -4 /* unexpected end of data on read op */ -#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */ -#define RX_USER_ABORT -6 /* generic user abort */ -#define RX_ADDRINUSE -7 /* UDP port in use */ -#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */ - -/* - * (un)marshalling abort codes (rxgen) - */ -#define RXGEN_CC_MARSHAL -450 -#define RXGEN_CC_UNMARSHAL -451 -#define RXGEN_SS_MARSHAL -452 -#define RXGEN_SS_UNMARSHAL -453 -#define RXGEN_DECODE -454 -#define RXGEN_OPCODE -455 -#define RXGEN_SS_XDRFREE -456 -#define RXGEN_CC_XDRFREE -457 - -/* - * Rx kerberos security abort codes - * - unfortunately we have no generalised security abort codes to say things - * like "unsupported security", so we have to use these instead and hope the - * other side understands - */ -#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */ -#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */ -#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */ -#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */ -#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */ -#define RXKADNOAUTH 19270405 /* caller not authorised */ -#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */ -#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */ -#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */ -#define RXKADEXPIRED 19270409 /* authentication expired */ -#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */ -#define RXKADDATALEN 19270411 /* user data too long */ -#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ - -#endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h index 08e2fb9c70ae..9656aad8f8f7 100644 --- a/include/uapi/linux/rxrpc.h +++ b/include/uapi/linux/rxrpc.h @@ -77,4 +77,48 @@ enum rxrpc_cmsg_type { #define RXRPC_SECURITY_RXGK 4 /* gssapi-based */ #define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */ +/* + * RxRPC-level abort codes + */ +#define RX_CALL_DEAD -1 /* call/conn has been inactive and is shut down */ +#define RX_INVALID_OPERATION -2 /* invalid operation requested / attempted */ +#define RX_CALL_TIMEOUT -3 /* call timeout exceeded */ +#define RX_EOF -4 /* unexpected end of data on read op */ +#define RX_PROTOCOL_ERROR -5 /* low-level protocol error */ +#define RX_USER_ABORT -6 /* generic user abort */ +#define RX_ADDRINUSE -7 /* UDP port in use */ +#define RX_DEBUGI_BADTYPE -8 /* bad debugging packet type */ + +/* + * (un)marshalling abort codes (rxgen) + */ +#define RXGEN_CC_MARSHAL -450 +#define RXGEN_CC_UNMARSHAL -451 +#define RXGEN_SS_MARSHAL -452 +#define RXGEN_SS_UNMARSHAL -453 +#define RXGEN_DECODE -454 +#define RXGEN_OPCODE -455 +#define RXGEN_SS_XDRFREE -456 +#define RXGEN_CC_XDRFREE -457 + +/* + * Rx kerberos security abort codes + * - unfortunately we have no generalised security abort codes to say things + * like "unsupported security", so we have to use these instead and hope the + * other side understands + */ +#define RXKADINCONSISTENCY 19270400 /* security module structure inconsistent */ +#define RXKADPACKETSHORT 19270401 /* packet too short for security challenge */ +#define RXKADLEVELFAIL 19270402 /* security level negotiation failed */ +#define RXKADTICKETLEN 19270403 /* ticket length too short or too long */ +#define RXKADOUTOFSEQUENCE 19270404 /* packet had bad sequence number */ +#define RXKADNOAUTH 19270405 /* caller not authorised */ +#define RXKADBADKEY 19270406 /* illegal key: bad parity or weak */ +#define RXKADBADTICKET 19270407 /* security object was passed a bad ticket */ +#define RXKADUNKNOWNKEY 19270408 /* ticket contained unknown key version number */ +#define RXKADEXPIRED 19270409 /* authentication expired */ +#define RXKADSEALEDINCON 19270410 /* sealed data inconsistent */ +#define RXKADDATALEN 19270411 /* user data too long */ +#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */ + #endif /* _UAPI_LINUX_RXRPC_H */ -- cgit From 8382a11b2782465a0edec5c8b99b93087ad1f66c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 21 Jul 2017 08:03:24 -0400 Subject: media: v4l2-fwnode: fix a Sphinx warning The kernel-doc tag is wrong there, causing this warning: ./include/media/v4l2-fwnode.h:66: warning: bad line: index (1) Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-fwnode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index b373c43f65e8..cb34dcb0bb65 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -63,7 +63,7 @@ struct v4l2_fwnode_bus_parallel { * false - not inverted, true - inverted * @strobe: false - data/clock, true - data/strobe * @lane_polarity: the polarities of the clock (index 0) and data lanes - index (1) + * index (1) * @data_lane: the number of the data lane * @clock_lane: the number of the clock lane */ -- cgit From bc2fb7ed089ffd16d26e1d95b898a37d2b37d201 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 15 May 2017 09:34:01 -0400 Subject: cgroup: add @flags to css_task_iter_start() and implement CSS_TASK_ITER_PROCS css_task_iter currently always walks all tasks. With the scheduled cgroup v2 thread support, the iterator would need to handle multiple types of iteration. As a preparation, add @flags to css_task_iter_start() and implement CSS_TASK_ITER_PROCS. If the flag is not specified, it walks all tasks as before. When asserted, the iterator only walks the group leaders. For now, the only user of the flag is cgroup v2 "cgroup.procs" file which no longer needs to skip non-leader tasks in cgroup_procs_next(). Note that cgroup v1 "cgroup.procs" can't use the group leader walk as v1 "cgroup.procs" doesn't mean "list all thread group leaders in the cgroup" but "list all thread group id's with any threads in the cgroup". While at it, update cgroup_procs_show() to use task_pid_vnr() instead of task_tgid_vnr(). As the iteration guarantees that the function only sees group leaders, this doesn't change the output and will allow sharing the function for thread iteration. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 308b10797a54..cae5831ae650 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -36,9 +36,13 @@ #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 +/* walk only threadgroup leaders */ +#define CSS_TASK_ITER_PROCS (1U << 0) + /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; + unsigned int flags; struct list_head *cset_pos; struct list_head *cset_head; @@ -129,7 +133,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); -void css_task_iter_start(struct cgroup_subsys_state *css, +void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, struct css_task_iter *it); struct task_struct *css_task_iter_next(struct css_task_iter *it); void css_task_iter_end(struct css_task_iter *it); -- cgit From 454000adaa2a7420df6e56a42f22726d05872a3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 15 May 2017 09:34:02 -0400 Subject: cgroup: introduce cgroup->dom_cgrp and threaded css_set handling cgroup v2 is in the process of growing thread granularity support. A threaded subtree is composed of a thread root and threaded cgroups which are proper members of the subtree. The root cgroup of the subtree serves as the domain cgroup to which the processes (as opposed to threads / tasks) of the subtree conceptually belong and domain-level resource consumptions not tied to any specific task are charged. Inside the subtree, threads won't be subject to process granularity or no-internal-task constraint and can be distributed arbitrarily across the subtree. This patch introduces cgroup->dom_cgrp along with threaded css_set handling. * cgroup->dom_cgrp points to self for normal and thread roots. For proper thread subtree members, points to the dom_cgrp (the thread root). * css_set->dom_cset points to self if for normal and thread roots. If threaded, points to the css_set which belongs to the cgrp->dom_cgrp. The dom_cgrp serves as the resource domain and keeps the matching csses available. The dom_cset holds those csses and makes them easily accessible. * All threaded csets are linked on their dom_csets to enable iteration of all threaded tasks. * cgroup->nr_threaded_children keeps track of the number of threaded children. This patch adds the above but doesn't actually use them yet. The following patches will build on top. v4: ->nr_threaded_children added. v3: ->proc_cgrp/cset renamed to ->dom_cgrp/cset. Updated for the new enable-threaded-per-cgroup behavior. v2: Added cgroup_is_threaded() helper. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 33 +++++++++++++++++++++++++++++---- include/linux/cgroup.h | 3 ++- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ae7bc1e70085..651c4363c85e 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -172,6 +172,14 @@ struct css_set { /* reference count */ refcount_t refcount; + /* + * For a domain cgroup, the following points to self. If threaded, + * to the matching cset of the nearest domain ancestor. The + * dom_cset provides access to the domain cgroup and its csses to + * which domain level resource consumptions should be charged. + */ + struct css_set *dom_cset; + /* the default cgroup associated with this css_set */ struct cgroup *dfl_cgrp; @@ -200,6 +208,10 @@ struct css_set { */ struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + /* all threaded csets whose ->dom_cset points to this cset */ + struct list_head threaded_csets; + struct list_head threaded_csets_node; + /* * List running through all cgroup groups in the same hash * slot. Protected by css_set_lock @@ -267,12 +279,16 @@ struct cgroup { * doesn't have any tasks. * * All children which have non-zero nr_populated_csets and/or - * nr_populated_children of their own contribute one to - * nr_populated_children. The counter is zero iff this cgroup's - * subtree proper doesn't have any tasks. + * nr_populated_children of their own contribute one to either + * nr_populated_domain_children or nr_populated_threaded_children + * depending on their type. Each counter is zero iff all cgroups + * of the type in the subtree proper don't have any tasks. */ int nr_populated_csets; - int nr_populated_children; + int nr_populated_domain_children; + int nr_populated_threaded_children; + + int nr_threaded_children; /* # of live threaded child cgroups */ struct kernfs_node *kn; /* cgroup kernfs entry */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */ @@ -310,6 +326,15 @@ struct cgroup { */ struct list_head e_csets[CGROUP_SUBSYS_COUNT]; + /* + * If !threaded, self. If threaded, it points to the nearest + * domain ancestor. Inside a threaded subtree, cgroups are exempt + * from process granularity and no-internal-task constraint. + * Domain level resource consumptions which aren't tied to a + * specific task are charged to the dom_cgrp. + */ + struct cgroup *dom_cgrp; + /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index cae5831ae650..b7dd23040cd5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -541,7 +541,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { - return cgrp->nr_populated_csets + cgrp->nr_populated_children; + return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children + + cgrp->nr_populated_threaded_children; } /* returns ino associated with a cgroup */ -- cgit From 450ee0c1feed657894e0b4bdd48f3974af9d394c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 15 May 2017 09:34:03 -0400 Subject: cgroup: implement CSS_TASK_ITER_THREADED cgroup v2 is in the process of growing thread granularity support. Once thread mode is enabled, the root cgroup of the subtree serves as the dom_cgrp to which the processes of the subtree conceptually belong and domain-level resource consumptions not tied to any specific task are charged. In the subtree, threads won't be subject to process granularity or no-internal-task constraint and can be distributed arbitrarily across the subtree. This patch implements a new task iterator flag CSS_TASK_ITER_THREADED, which, when used on a dom_cgrp, makes the iteration include the tasks on all the associated threaded css_sets. "cgroup.procs" read path is updated to use it so that reading the file on a proc_cgrp lists all processes. This will also be used by controller implementations which need to walk processes or tasks at the resource domain level. Task iteration is implemented nested in css_set iteration. If CSS_TASK_ITER_THREADED is specified, after walking tasks of each !threaded css_set, all the associated threaded css_sets are visited before moving onto the next !threaded css_set. v2: ->cur_pcset renamed to ->cur_dcset. Updated for the new enable-threaded-per-cgroup behavior. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b7dd23040cd5..79faa6467f76 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -38,6 +38,8 @@ /* walk only threadgroup leaders */ #define CSS_TASK_ITER_PROCS (1U << 0) +/* walk all threaded css_sets in the domain */ +#define CSS_TASK_ITER_THREADED (1U << 1) /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { @@ -47,11 +49,15 @@ struct css_task_iter { struct list_head *cset_pos; struct list_head *cset_head; + struct list_head *tcset_pos; + struct list_head *tcset_head; + struct list_head *task_pos; struct list_head *tasks_head; struct list_head *mg_tasks_head; struct css_set *cur_cset; + struct css_set *cur_dcset; struct task_struct *cur_task; struct list_head iters_node; /* css_set->task_iters */ }; -- cgit From 8cfd8147df67e741d93b8783a3ea8f3c74f93a0e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 21 Jul 2017 11:14:51 -0400 Subject: cgroup: implement cgroup v2 thread support This patch implements cgroup v2 thread support. The goal of the thread mode is supporting hierarchical accounting and control at thread granularity while staying inside the resource domain model which allows coordination across different resource controllers and handling of anonymous resource consumptions. A cgroup is always created as a domain and can be made threaded by writing to the "cgroup.type" file. When a cgroup becomes threaded, it becomes a member of a threaded subtree which is anchored at the closest ancestor which isn't threaded. The threads of the processes which are in a threaded subtree can be placed anywhere without being restricted by process granularity or no-internal-process constraint. Note that the threads aren't allowed to escape to a different threaded subtree. To be used inside a threaded subtree, a controller should explicitly support threaded mode and be able to handle internal competition in the way which is appropriate for the resource. The root of a threaded subtree, the nearest ancestor which isn't threaded, is called the threaded domain and serves as the resource domain for the whole subtree. This is the last cgroup where domain controllers are operational and where all the domain-level resource consumptions in the subtree are accounted. This allows threaded controllers to operate at thread granularity when requested while staying inside the scope of system-level resource distribution. As the root cgroup is exempt from the no-internal-process constraint, it can serve as both a threaded domain and a parent to normal cgroups, so, unlike non-root cgroups, the root cgroup can have both domain and threaded children. Internally, in a threaded subtree, each css_set has its ->dom_cset pointing to a matching css_set which belongs to the threaded domain. This ensures that thread root level cgroup_subsys_state for all threaded controllers are readily accessible for domain-level operations. This patch enables threaded mode for the pids and perf_events controllers. Neither has to worry about domain-level resource consumptions and it's enough to simply set the flag. For more details on the interface and behavior of the thread mode, please refer to the section 2-2-2 in Documentation/cgroup-v2.txt added by this patch. v5: - Dropped silly no-op ->dom_cgrp init from cgroup_create(). Spotted by Waiman. - Documentation updated as suggested by Waiman. - cgroup.type content slightly reformatted. - Mark the debug controller threaded. v4: - Updated to the general idea of marking specific cgroups domain/threaded as suggested by PeterZ. v3: - Dropped "join" and always make mixed children join the parent's threaded subtree. v2: - After discussions with Waiman, support for mixed thread mode is added. This should address the issue that Peter pointed out where any nesting should be avoided for thread subtrees while coexisting with other domain cgroups. - Enabling / disabling thread mode now piggy backs on the existing control mask update mechanism. - Bug fixes and cleanup. Signed-off-by: Tejun Heo Cc: Waiman Long Cc: Peter Zijlstra --- include/linux/cgroup-defs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 651c4363c85e..9d741959f218 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -521,6 +521,18 @@ struct cgroup_subsys { */ bool implicit_on_dfl:1; + /* + * If %true, the controller, supports threaded mode on the default + * hierarchy. In a threaded subtree, both process granularity and + * no-internal-process constraint are ignored and a threaded + * controllers should be able to handle that. + * + * Note that as an implicit controller is automatically enabled on + * all cgroups on the default hierarchy, it should also be + * threaded. implicit && !threaded is not supported. + */ + bool threaded:1; + /* * If %false, this subsystem is properly hierarchical - * configuration, resource accounting and restriction on a parent -- cgit From d16645054d2f55e4011c9725ddf2dbe6177e942a Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 21 Jul 2017 09:38:36 -0700 Subject: dt-bindings: Drop k2g genpd device ID macros Commit 7cc119f29b19 ("dt-bindings: Add TI SCI PM Domains") introduced a number of K2G_DEV_x macros to represent each device ID available on the K2G platform for use by the genpd, clock, and reset drivers. Rather than use these macros, which are only used in the device tree for property values and not actually used by the drivers, let's just use the device ID number directly in the device tree to avoid macro bloat. Acked-by: Rob Herring Signed-off-by: Dave Gerlach Signed-off-by: Santosh Shilimkar --- include/dt-bindings/genpd/k2g.h | 90 ----------------------------------------- 1 file changed, 90 deletions(-) delete mode 100644 include/dt-bindings/genpd/k2g.h (limited to 'include') diff --git a/include/dt-bindings/genpd/k2g.h b/include/dt-bindings/genpd/k2g.h deleted file mode 100644 index 1f31f17e19eb..000000000000 --- a/include/dt-bindings/genpd/k2g.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * TI K2G SoC Device definitions - * - * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef _DT_BINDINGS_GENPD_K2G_H -#define _DT_BINDINGS_GENPD_K2G_H - -/* Documented in http://processors.wiki.ti.com/index.php/TISCI */ - -#define K2G_DEV_PMMC0 0x0000 -#define K2G_DEV_MLB0 0x0001 -#define K2G_DEV_DSS0 0x0002 -#define K2G_DEV_MCBSP0 0x0003 -#define K2G_DEV_MCASP0 0x0004 -#define K2G_DEV_MCASP1 0x0005 -#define K2G_DEV_MCASP2 0x0006 -#define K2G_DEV_DCAN0 0x0008 -#define K2G_DEV_DCAN1 0x0009 -#define K2G_DEV_EMIF0 0x000a -#define K2G_DEV_MMCHS0 0x000b -#define K2G_DEV_MMCHS1 0x000c -#define K2G_DEV_GPMC0 0x000d -#define K2G_DEV_ELM0 0x000e -#define K2G_DEV_SPI0 0x0010 -#define K2G_DEV_SPI1 0x0011 -#define K2G_DEV_SPI2 0x0012 -#define K2G_DEV_SPI3 0x0013 -#define K2G_DEV_ICSS0 0x0014 -#define K2G_DEV_ICSS1 0x0015 -#define K2G_DEV_USB0 0x0016 -#define K2G_DEV_USB1 0x0017 -#define K2G_DEV_NSS0 0x0018 -#define K2G_DEV_PCIE0 0x0019 -#define K2G_DEV_GPIO0 0x001b -#define K2G_DEV_GPIO1 0x001c -#define K2G_DEV_TIMER64_0 0x001d -#define K2G_DEV_TIMER64_1 0x001e -#define K2G_DEV_TIMER64_2 0x001f -#define K2G_DEV_TIMER64_3 0x0020 -#define K2G_DEV_TIMER64_4 0x0021 -#define K2G_DEV_TIMER64_5 0x0022 -#define K2G_DEV_TIMER64_6 0x0023 -#define K2G_DEV_MSGMGR0 0x0025 -#define K2G_DEV_BOOTCFG0 0x0026 -#define K2G_DEV_ARM_BOOTROM0 0x0027 -#define K2G_DEV_DSP_BOOTROM0 0x0029 -#define K2G_DEV_DEBUGSS0 0x002b -#define K2G_DEV_UART0 0x002c -#define K2G_DEV_UART1 0x002d -#define K2G_DEV_UART2 0x002e -#define K2G_DEV_EHRPWM0 0x002f -#define K2G_DEV_EHRPWM1 0x0030 -#define K2G_DEV_EHRPWM2 0x0031 -#define K2G_DEV_EHRPWM3 0x0032 -#define K2G_DEV_EHRPWM4 0x0033 -#define K2G_DEV_EHRPWM5 0x0034 -#define K2G_DEV_EQEP0 0x0035 -#define K2G_DEV_EQEP1 0x0036 -#define K2G_DEV_EQEP2 0x0037 -#define K2G_DEV_ECAP0 0x0038 -#define K2G_DEV_ECAP1 0x0039 -#define K2G_DEV_I2C0 0x003a -#define K2G_DEV_I2C1 0x003b -#define K2G_DEV_I2C2 0x003c -#define K2G_DEV_EDMA0 0x003f -#define K2G_DEV_SEMAPHORE0 0x0040 -#define K2G_DEV_INTC0 0x0041 -#define K2G_DEV_GIC0 0x0042 -#define K2G_DEV_QSPI0 0x0043 -#define K2G_DEV_ARM_64B_COUNTER0 0x0044 -#define K2G_DEV_TETRIS0 0x0045 -#define K2G_DEV_CGEM0 0x0046 -#define K2G_DEV_MSMC0 0x0047 -#define K2G_DEV_CBASS0 0x0049 -#define K2G_DEV_BOARD0 0x004c -#define K2G_DEV_EDMA1 0x004f - -#endif -- cgit From e8e3edb95ce6a146bc774b6cfad3553f4383edc8 Mon Sep 17 00:00:00 2001 From: Mario Rugiero Date: Mon, 29 May 2017 08:38:41 -0300 Subject: mtd: create per-device and module-scope debugfs entries Several MTD devices are using debugfs entries created in the root. This commit provides the means for a standardized subtree, creating one "mtd" entry at root, and one entry per device inside it, named after the device. The tree is registered in add_mtd_device, and released in del_mtd_device. Devices docg3, mtdswap and nandsim were updated to use this subtree instead of custom ones, and their entries were prefixed with the drivers' names. Signed-off-by: Mario J. Rugiero Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- include/linux/mtd/mtd.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index f8a2ef239c60..6cd0f6b7658b 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -206,6 +206,15 @@ struct mtd_pairing_scheme { struct module; /* only needed for owner field in mtd_info */ +/** + * struct mtd_debug_info - debugging information for an MTD device. + * + * @dfs_dir: direntry object of the MTD device debugfs directory + */ +struct mtd_debug_info { + struct dentry *dfs_dir; +}; + struct mtd_info { u_char type; uint32_t flags; @@ -346,6 +355,7 @@ struct mtd_info { struct module *owner; struct device dev; int usecount; + struct mtd_debug_info dbg; }; int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, -- cgit From b81b729164445cd94c4dd6fc4d6f10487434df4a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 21 Jul 2017 14:39:30 +0300 Subject: ACPI: Use IS_ERR_OR_NULL() instead of non-NULL check in is_acpi_data_node() The is_acpi_data_node() function takes a struct fwnode_handle pointer as its argument. The validity of the pointer is first checked. Extend the check to cover error values as is done by similar is_acpi_node() and is_acpi_device_node() functions. Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 68bc6be447fd..7569123475b3 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -414,7 +414,7 @@ static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwno static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_ACPI_DATA; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI_DATA; } static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode) -- cgit From db3e50f3234ba1a477413f56a9e5800a73dca786 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 21 Jul 2017 14:39:31 +0300 Subject: device property: Get rid of struct fwnode_handle type field Instead of relying on the struct fwnode_handle type field, define fwnode_operations structs for all separate types of fwnodes. To find out the type, compare to the ops field to relevant ops structs. This change has two benefits: 1. it avoids adding the type field to each and every instance of struct fwnode_handle, thus saving memory and 2. makes the ops field the single factor that defines both the types of the fwnode as well as defines the implementation of its operations, decreasing the possibility of bugs when developing code dealing with fwnode internals. Suggested-by: Rob Herring Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 20 ++++++++++++++++---- include/linux/acpi.h | 8 ++------ include/linux/fwnode.h | 11 ----------- include/linux/irqdomain.h | 4 +++- include/linux/of.h | 3 +-- 5 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 7569123475b3..91b1e58e5189 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -395,15 +395,21 @@ struct acpi_data_node { struct completion kobj_done; }; +extern const struct fwnode_operations acpi_device_fwnode_ops; +extern const struct fwnode_operations acpi_data_fwnode_ops; +extern const struct fwnode_operations acpi_static_fwnode_ops; + static inline bool is_acpi_node(struct fwnode_handle *fwnode) { - return !IS_ERR_OR_NULL(fwnode) && (fwnode->type == FWNODE_ACPI - || fwnode->type == FWNODE_ACPI_DATA); + return !IS_ERR_OR_NULL(fwnode) && + (fwnode->ops == &acpi_device_fwnode_ops + || fwnode->ops == &acpi_data_fwnode_ops); } static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) { - return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI; + return !IS_ERR_OR_NULL(fwnode) && + fwnode->ops == &acpi_device_fwnode_ops; } static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) @@ -414,7 +420,7 @@ static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwno static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) { - return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI_DATA; + return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops; } static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode) @@ -423,6 +429,12 @@ static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwn container_of(fwnode, struct acpi_data_node, fwnode) : NULL; } +static inline bool is_acpi_static_node(struct fwnode_handle *fwnode) +{ + return !IS_ERR_OR_NULL(fwnode) && + fwnode->ops == &acpi_static_fwnode_ops; +} + static inline bool acpi_data_node_match(struct fwnode_handle *fwnode, const char *name) { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c749eef1daa1..71b763f0bee9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -57,9 +57,6 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) - -extern const struct fwnode_operations acpi_fwnode_ops; - static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { struct fwnode_handle *fwnode; @@ -68,15 +65,14 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) if (!fwnode) return NULL; - fwnode->type = FWNODE_ACPI_STATIC; - fwnode->ops = &acpi_fwnode_ops; + fwnode->ops = &acpi_static_fwnode_ops; return fwnode; } static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode) { - if (WARN_ON(!fwnode || fwnode->type != FWNODE_ACPI_STATIC)) + if (WARN_ON(!is_acpi_static_node(fwnode))) return; kfree(fwnode); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 50893a1646cf..c5dbc48b55dd 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -14,20 +14,9 @@ #include -enum fwnode_type { - FWNODE_INVALID = 0, - FWNODE_OF, - FWNODE_ACPI, - FWNODE_ACPI_DATA, - FWNODE_ACPI_STATIC, - FWNODE_PDATA, - FWNODE_IRQCHIP -}; - struct fwnode_operations; struct fwnode_handle { - enum fwnode_type type; struct fwnode_handle *secondary; const struct fwnode_operations *ops; }; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index cac77a5c5555..d24273840b79 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -265,9 +265,11 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) return node ? &node->fwnode : NULL; } +extern const struct fwnode_operations irqchip_fwnode_ops; + static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_IRQCHIP; + return fwnode && fwnode->ops == &irqchip_fwnode_ops; } extern void irq_domain_update_bus_token(struct irq_domain *domain, diff --git a/include/linux/of.h b/include/linux/of.h index 4a8a70916237..cfc34117fc92 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -104,7 +104,6 @@ extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { kobject_init(&node->kobj, &of_node_ktype); - node->fwnode.type = FWNODE_OF; node->fwnode.ops = &of_fwnode_ops; } @@ -152,7 +151,7 @@ void of_core_init(void); static inline bool is_of_node(const struct fwnode_handle *fwnode) { - return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF; + return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &of_fwnode_ops; } #define to_of_node(__fwnode) \ -- cgit From 8b9d6802583a1ef6977e4b059f9fa848e6882253 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 21 Jul 2017 14:39:33 +0300 Subject: ACPI: Constify acpi_bus helper functions, switch to macros Constify arguments to is_acpi_node(), is_acpi_device_node(), is_acpi_static_node() and acpi_data_node_match(). Make to_acpi_device_node() and to_acpi_data_node() macros that can cope with const and non-const arguments. Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 91b1e58e5189..89bc2132429d 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -399,43 +399,51 @@ extern const struct fwnode_operations acpi_device_fwnode_ops; extern const struct fwnode_operations acpi_data_fwnode_ops; extern const struct fwnode_operations acpi_static_fwnode_ops; -static inline bool is_acpi_node(struct fwnode_handle *fwnode) +static inline bool is_acpi_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && (fwnode->ops == &acpi_device_fwnode_ops || fwnode->ops == &acpi_data_fwnode_ops); } -static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) +static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_device_fwnode_ops; } -static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) -{ - return is_acpi_device_node(fwnode) ? - container_of(fwnode, struct acpi_device, fwnode) : NULL; -} - -static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) +#define to_acpi_device_node(__fwnode) \ + ({ \ + typeof(__fwnode) __to_acpi_device_node_fwnode = __fwnode; \ + \ + is_acpi_device_node(__to_acpi_device_node_fwnode) ? \ + container_of(__to_acpi_device_node_fwnode, \ + struct acpi_device, fwnode) : \ + NULL; \ + }) + +static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops; } -static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode) -{ - return is_acpi_data_node(fwnode) ? - container_of(fwnode, struct acpi_data_node, fwnode) : NULL; -} - -static inline bool is_acpi_static_node(struct fwnode_handle *fwnode) +#define to_acpi_data_node(__fwnode) \ + ({ \ + typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode; \ + \ + is_acpi_data_node(__to_acpi_data_node_fwnode) ? \ + container_of(__to_acpi_data_node_fwnode, \ + struct acpi_data_node, fwnode) : \ + NULL; \ + }) + +static inline bool is_acpi_static_node(const struct fwnode_handle *fwnode) { return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_static_fwnode_ops; } -static inline bool acpi_data_node_match(struct fwnode_handle *fwnode, +static inline bool acpi_data_node_match(const struct fwnode_handle *fwnode, const char *name) { return is_acpi_data_node(fwnode) ? -- cgit From 99a85464693faa9b6829cb753b328c2e4434d94b Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 21 Jul 2017 14:39:34 +0300 Subject: ACPI: Constify internal fwnode arguments Constify internal ACPI fwnode arguments in preparation for the same in fwnode API. Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 71b763f0bee9..4d9fb610f114 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1003,13 +1003,14 @@ struct acpi_reference_args { }; #ifdef CONFIG_ACPI -int acpi_dev_get_property(struct acpi_device *adev, const char *name, +int acpi_dev_get_property(const struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); -int __acpi_node_get_property_reference(struct fwnode_handle *fwnode, +int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct acpi_reference_args *args); -static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, +static inline int acpi_node_get_property_reference( + const struct fwnode_handle *fwnode, const char *name, size_t index, struct acpi_reference_args *args) { @@ -1017,13 +1018,15 @@ static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, MAX_ACPI_REFERENCE_ARGS, args); } -int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname, +int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr); -int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname, - enum dev_prop_type proptype, void *val); -int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname, - enum dev_prop_type proptype, void *val, size_t nval); -int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, +int acpi_dev_prop_read_single(struct acpi_device *adev, + const char *propname, enum dev_prop_type proptype, + void *val); +int acpi_node_prop_read(const struct fwnode_handle *fwnode, + const char *propname, enum dev_prop_type proptype, + void *val, size_t nval); +int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); struct fwnode_handle *acpi_get_next_subnode(struct fwnode_handle *fwnode, @@ -1100,35 +1103,36 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, } static inline int -__acpi_node_get_property_reference(struct fwnode_handle *fwnode, +__acpi_node_get_property_reference(const struct fwnode_handle *fwnode, const char *name, size_t index, size_t num_args, struct acpi_reference_args *args) { return -ENXIO; } -static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, - const char *name, size_t index, - struct acpi_reference_args *args) +static inline int +acpi_node_get_property_reference(const struct fwnode_handle *fwnode, + const char *name, size_t index, + struct acpi_reference_args *args) { return -ENXIO; } -static inline int acpi_node_prop_get(struct fwnode_handle *fwnode, +static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname, void **valptr) { return -ENXIO; } -static inline int acpi_dev_prop_get(struct acpi_device *adev, +static inline int acpi_dev_prop_get(const struct acpi_device *adev, const char *propname, void **valptr) { return -ENXIO; } -static inline int acpi_dev_prop_read_single(struct acpi_device *adev, +static inline int acpi_dev_prop_read_single(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val) @@ -1136,7 +1140,7 @@ static inline int acpi_dev_prop_read_single(struct acpi_device *adev, return -ENXIO; } -static inline int acpi_node_prop_read(struct fwnode_handle *fwnode, +static inline int acpi_node_prop_read(const struct fwnode_handle *fwnode, const char *propname, enum dev_prop_type proptype, void *val, size_t nval) @@ -1144,7 +1148,7 @@ static inline int acpi_node_prop_read(struct fwnode_handle *fwnode, return -ENXIO; } -static inline int acpi_dev_prop_read(struct acpi_device *adev, +static inline int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval) -- cgit From 37ba983cfb47cc7b353146422c437468fcb29c61 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 21 Jul 2017 14:39:36 +0300 Subject: device property: Constify fwnode property API Make fwnode arguments to the fwnode property API const. Signed-off-by: Sakari Ailus Reviewed-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 20 ++++++++------- include/linux/fwnode.h | 26 +++++++++++--------- include/linux/property.h | 63 +++++++++++++++++++++++++----------------------- 3 files changed, 58 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4d9fb610f114..8b9edf87b98d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1029,13 +1029,14 @@ int acpi_node_prop_read(const struct fwnode_handle *fwnode, int acpi_dev_prop_read(const struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); -struct fwnode_handle *acpi_get_next_subnode(struct fwnode_handle *fwnode, +struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child); -struct fwnode_handle *acpi_node_get_parent(struct fwnode_handle *fwnode); +struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode); -struct fwnode_handle *acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, - struct fwnode_handle *prev); -int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, +struct fwnode_handle * +acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, + struct fwnode_handle *prev); +int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle **remote, struct fwnode_handle **port, struct fwnode_handle **endpoint); @@ -1157,26 +1158,27 @@ static inline int acpi_dev_prop_read(const struct acpi_device *adev, } static inline struct fwnode_handle * -acpi_get_next_subnode(struct fwnode_handle *fwnode, struct fwnode_handle *child) +acpi_get_next_subnode(const struct fwnode_handle *fwnode, + struct fwnode_handle *child) { return NULL; } static inline struct fwnode_handle * -acpi_node_get_parent(struct fwnode_handle *fwnode) +acpi_node_get_parent(const struct fwnode_handle *fwnode) { return NULL; } static inline struct fwnode_handle * -acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, +acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return ERR_PTR(-ENXIO); } static inline int -acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, +acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle **remote, struct fwnode_handle **port, struct fwnode_handle **endpoint) diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index c5dbc48b55dd..7b50ee4edcfc 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -55,30 +55,32 @@ struct fwnode_endpoint { struct fwnode_operations { void (*get)(struct fwnode_handle *fwnode); void (*put)(struct fwnode_handle *fwnode); - bool (*device_is_available)(struct fwnode_handle *fwnode); - bool (*property_present)(struct fwnode_handle *fwnode, + bool (*device_is_available)(const struct fwnode_handle *fwnode); + bool (*property_present)(const struct fwnode_handle *fwnode, const char *propname); - int (*property_read_int_array)(struct fwnode_handle *fwnode, + int (*property_read_int_array)(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval); - int (*property_read_string_array)(struct fwnode_handle *fwnode_handle, - const char *propname, - const char **val, size_t nval); - struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode); + int + (*property_read_string_array)(const struct fwnode_handle *fwnode_handle, + const char *propname, const char **val, + size_t nval); + struct fwnode_handle *(*get_parent)(const struct fwnode_handle *fwnode); struct fwnode_handle * - (*get_next_child_node)(struct fwnode_handle *fwnode, + (*get_next_child_node)(const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle * - (*get_named_child_node)(struct fwnode_handle *fwnode, const char *name); + (*get_named_child_node)(const struct fwnode_handle *fwnode, + const char *name); struct fwnode_handle * - (*graph_get_next_endpoint)(struct fwnode_handle *fwnode, + (*graph_get_next_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_handle *prev); struct fwnode_handle * - (*graph_get_remote_endpoint)(struct fwnode_handle *fwnode); + (*graph_get_remote_endpoint)(const struct fwnode_handle *fwnode); struct fwnode_handle * (*graph_get_port_parent)(struct fwnode_handle *fwnode); - int (*graph_parse_endpoint)(struct fwnode_handle *fwnode, + int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); }; diff --git a/include/linux/property.h b/include/linux/property.h index 7e77039e6b81..edff3f89e755 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -51,46 +51,48 @@ int device_property_read_string(struct device *dev, const char *propname, int device_property_match_string(struct device *dev, const char *propname, const char *string); -bool fwnode_device_is_available(struct fwnode_handle *fwnode); -bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname); -int fwnode_property_read_u8_array(struct fwnode_handle *fwnode, +bool fwnode_device_is_available(const struct fwnode_handle *fwnode); +bool fwnode_property_present(const struct fwnode_handle *fwnode, + const char *propname); +int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode, const char *propname, u8 *val, size_t nval); -int fwnode_property_read_u16_array(struct fwnode_handle *fwnode, +int fwnode_property_read_u16_array(const struct fwnode_handle *fwnode, const char *propname, u16 *val, size_t nval); -int fwnode_property_read_u32_array(struct fwnode_handle *fwnode, +int fwnode_property_read_u32_array(const struct fwnode_handle *fwnode, const char *propname, u32 *val, size_t nval); -int fwnode_property_read_u64_array(struct fwnode_handle *fwnode, +int fwnode_property_read_u64_array(const struct fwnode_handle *fwnode, const char *propname, u64 *val, size_t nval); -int fwnode_property_read_string_array(struct fwnode_handle *fwnode, +int fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval); -int fwnode_property_read_string(struct fwnode_handle *fwnode, +int fwnode_property_read_string(const struct fwnode_handle *fwnode, const char *propname, const char **val); -int fwnode_property_match_string(struct fwnode_handle *fwnode, +int fwnode_property_match_string(const struct fwnode_handle *fwnode, const char *propname, const char *string); -struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode); -struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); -struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode, - struct fwnode_handle *child); +struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_next_parent( + struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_next_child_node( + const struct fwnode_handle *fwnode, struct fwnode_handle *child); #define fwnode_for_each_child_node(fwnode, child) \ for (child = fwnode_get_next_child_node(fwnode, NULL); child; \ child = fwnode_get_next_child_node(fwnode, child)) -struct fwnode_handle *device_get_next_child_node(struct device *dev, - struct fwnode_handle *child); +struct fwnode_handle *device_get_next_child_node( + struct device *dev, struct fwnode_handle *child); #define device_for_each_child_node(dev, child) \ for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) -struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode, - const char *childname); +struct fwnode_handle *fwnode_get_named_child_node( + const struct fwnode_handle *fwnode, const char *childname); struct fwnode_handle *device_get_named_child_node(struct device *dev, const char *childname); @@ -129,31 +131,31 @@ static inline int device_property_read_u64(struct device *dev, return device_property_read_u64_array(dev, propname, val, 1); } -static inline bool fwnode_property_read_bool(struct fwnode_handle *fwnode, +static inline bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { return fwnode_property_present(fwnode, propname); } -static inline int fwnode_property_read_u8(struct fwnode_handle *fwnode, +static inline int fwnode_property_read_u8(const struct fwnode_handle *fwnode, const char *propname, u8 *val) { return fwnode_property_read_u8_array(fwnode, propname, val, 1); } -static inline int fwnode_property_read_u16(struct fwnode_handle *fwnode, +static inline int fwnode_property_read_u16(const struct fwnode_handle *fwnode, const char *propname, u16 *val) { return fwnode_property_read_u16_array(fwnode, propname, val, 1); } -static inline int fwnode_property_read_u32(struct fwnode_handle *fwnode, +static inline int fwnode_property_read_u32(const struct fwnode_handle *fwnode, const char *propname, u32 *val) { return fwnode_property_read_u32_array(fwnode, propname, val, 1); } -static inline int fwnode_property_read_u64(struct fwnode_handle *fwnode, +static inline int fwnode_property_read_u64(const struct fwnode_handle *fwnode, const char *propname, u64 *val) { return fwnode_property_read_u64_array(fwnode, propname, val, 1); @@ -274,19 +276,20 @@ int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); struct fwnode_handle *fwnode_graph_get_next_endpoint( - struct fwnode_handle *fwnode, struct fwnode_handle *prev); + const struct fwnode_handle *fwnode, struct fwnode_handle *prev); struct fwnode_handle * -fwnode_graph_get_port_parent(struct fwnode_handle *fwnode); +fwnode_graph_get_port_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_port_parent( - struct fwnode_handle *fwnode); + const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_port( - struct fwnode_handle *fwnode); + const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_endpoint( - struct fwnode_handle *fwnode); -struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode, - u32 port, u32 endpoint); + const struct fwnode_handle *fwnode); +struct fwnode_handle * +fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port, + u32 endpoint); -int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode, +int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); #endif /* _LINUX_PROPERTY_H_ */ -- cgit From 3e3119d3088f41106f3581d39e7694a50ca3fc02 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 21 Jul 2017 15:11:49 +0300 Subject: device property: Introduce fwnode_property_get_reference_args The new fwnode_property_get_reference_args() interface amends the fwnode property API with the functionality of both of_parse_phandle_with_args() and __acpi_node_get_property_reference(). The semantics is slightly different: the cells property is ignored on ACPI as the number of arguments can be explicitly obtained from the firmware interface. Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 19 +++++++++++++++++++ include/linux/property.h | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 7b50ee4edcfc..0c35b6caf0f6 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -33,6 +33,20 @@ struct fwnode_endpoint { const struct fwnode_handle *local_fwnode; }; +#define NR_FWNODE_REFERENCE_ARGS 8 + +/** + * struct fwnode_reference_args - Fwnode reference with additional arguments + * @fwnode:- A reference to the base fwnode + * @nargs: Number of elements in @args array + * @args: Integer arguments on the fwnode + */ +struct fwnode_reference_args { + struct fwnode_handle *fwnode; + unsigned int nargs; + unsigned int args[NR_FWNODE_REFERENCE_ARGS]; +}; + /** * struct fwnode_operations - Operations for fwnode interface * @get: Get a reference to an fwnode. @@ -46,6 +60,7 @@ struct fwnode_endpoint { * @get_parent: Return the parent of an fwnode. * @get_next_child_node: Return the next child node in an iteration. * @get_named_child_node: Return a child node with a given name. + * @get_reference_args: Return a reference pointed to by a property, with args * @graph_get_next_endpoint: Return an endpoint node in an iteration. * @graph_get_remote_endpoint: Return the remote endpoint node of a local * endpoint node. @@ -73,6 +88,10 @@ struct fwnode_operations { struct fwnode_handle * (*get_named_child_node)(const struct fwnode_handle *fwnode, const char *name); + int (*get_reference_args)(const struct fwnode_handle *fwnode, + const char *prop, const char *nargs_prop, + unsigned int nargs, unsigned int index, + struct fwnode_reference_args *args); struct fwnode_handle * (*graph_get_next_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_handle *prev); diff --git a/include/linux/property.h b/include/linux/property.h index edff3f89e755..6bebee13c5e0 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -73,6 +73,10 @@ int fwnode_property_read_string(const struct fwnode_handle *fwnode, const char *propname, const char **val); int fwnode_property_match_string(const struct fwnode_handle *fwnode, const char *propname, const char *string); +int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, + const char *prop, const char *nargs_prop, + unsigned int nargs, unsigned int index, + struct fwnode_reference_args *args); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( -- cgit From 2d045036322c29b69c22f06530f1130338d06373 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:41 +0530 Subject: cpufreq: governor: Drop min_sampling_rate The cpufreq core and governors aren't supposed to set a limit on how fast we want to try changing the frequency. This is currently done for the legacy governors with help of min_sampling_rate. At worst, we may end up setting the sampling rate to a value lower than the rate at which frequency can be changed and then one of the CPUs in the policy will be only changing frequency for ever. But that is something for the user to decide and there is no need to have special handling for such cases in the core. Leave it for the user to figure out. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f10a9b3761cd..02aec384cab9 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -491,9 +491,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) * the ondemand governor will not work. All times here are in us (microseconds). */ -#define MIN_SAMPLING_RATE_RATIO (2) #define LATENCY_MULTIPLIER (1000) -#define MIN_LATENCY_MULTIPLIER (20) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) struct cpufreq_governor { -- cgit From aa7519af450d3c62a057aece24877c34562fa25a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:42 +0530 Subject: cpufreq: Use transition_delay_us for legacy governors as well The policy->transition_delay_us field is used only by the schedutil governor currently, and this field describes how fast the driver wants the cpufreq governor to change CPUs frequency. It should rather be a common thing across all governors, as it doesn't have any schedutil dependency here. Create a new helper cpufreq_policy_transition_delay_us() to get the transition delay across all governors. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 02aec384cab9..aaadfc543f63 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -523,6 +523,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq); +unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); -- cgit From bd8c9ba3b1e2037af5af4e48aea1087212838179 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 17 Jul 2017 17:19:25 -0700 Subject: PM / suspend: Export pm_suspend_target_state Have the core suspend/resume framework store the system-wide suspend state (suspend_state_t) we are about to enter, and expose it to drivers via pm_suspend_target_state in order to retrieve that. The state is assigned in suspend_devices_and_enter(). This is useful for platform specific drivers that may need to take a slightly different suspend/resume path based on the system's suspend/resume state being entered. Signed-off-by: Florian Fainelli Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0b1cf32edfd7..2159f6841768 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -427,6 +427,7 @@ extern int unregister_pm_notifier(struct notifier_block *nb); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; extern unsigned int pm_wakeup_irq; +extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); -- cgit From 8d8b2441db9647890251538f60b75a4e45fdef8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Jul 2017 02:38:44 +0200 Subject: PM / sleep: Do not print debug messages by default Debug messages from the system suspend/hibernation infrastructure can fill up the entire kernel log buffer in some cases and anyway they are only useful for debugging. They depend on CONFIG_PM_DEBUG, but that is set as a rule as some generally useful diagnostic facilities depend on it too. For this reason, avoid printing those messages by default, but make it possible to turn them on as needed with the help of a new sysfs attribute under /sys/power/. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 2159f6841768..707e4cdf21c2 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -492,8 +492,14 @@ static inline void unlock_system_sleep(void) {} #ifdef CONFIG_PM_SLEEP_DEBUG extern bool pm_print_times_enabled; +extern __printf(1, 2) void pm_pr_dbg(const char *fmt, ...); #else #define pm_print_times_enabled (false) + +#include + +#define pm_pr_dbg(fmt, ...) \ + no_printk(KERN_DEBUG fmt, ##__VA_ARGS__) #endif #ifdef CONFIG_PM_AUTOSLEEP -- cgit From 1455cf8dbfd06aa7651dcfccbadb7a093944ca65 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Jul 2017 17:24:30 -0700 Subject: driver core: emit uevents when device is bound to a driver There are certain touch controllers that may come up in either normal (application) or boot mode, depending on whether firmware/configuration is corrupted when they are powered on. In boot mode the kernel does not create input device instance (because it does not necessarily know the characteristics of the input device in question). Another number of controllers does not store firmware in a non-volatile memory, and they similarly need to have firmware loaded before input device instance is created. There are also other types of devices with similar behavior. There is a desire to be able to trigger firmware loading via udev, but it has to happen only when driver is bound to a physical device (i2c or spi). These udev actions can not use ADD events, as those happen too early, so we are introducing BIND and UNBIND events that are emitted at the right moment. Also, many drivers create additional driver-specific device attributes when binding to the device, to provide userspace with additional controls. The new events allow userspace to adjust these driver-specific attributes without worrying that they are not there yet. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ca85cb80e99a..12f5ddccb97c 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -57,6 +57,8 @@ enum kobject_action { KOBJ_MOVE, KOBJ_ONLINE, KOBJ_OFFLINE, + KOBJ_BIND, + KOBJ_UNBIND, KOBJ_MAX }; -- cgit From a7670d425b75f9e44b7d4d0aea04f4a6d5f34291 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Jul 2017 17:24:31 -0700 Subject: driver core: make device_{add|remove}_groups() public Many drivers create additional driver-specific device attributes when binding to the device. To avoid them calling SYSFS API directly, let's export these helpers. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 9ef518af5515..10cf209a4e82 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1200,6 +1200,11 @@ struct device *device_create_with_groups(struct class *cls, const char *fmt, ...); extern void device_destroy(struct class *cls, dev_t devt); +extern int __must_check device_add_groups(struct device *dev, + const struct attribute_group **groups); +extern void device_remove_groups(struct device *dev, + const struct attribute_group **groups); + /* * Platform "fixup" functions - allow the platform to have their say * about devices and actions that the general device layer doesn't -- cgit From e323b2dddc1ce7ea7f032c986c012a04d5977dc8 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Jul 2017 17:24:32 -0700 Subject: driver core: add device_{add|remove}_group() helpers We have helpers that work with NULL terminated array of groups, but many drivers only create a single supplemental group, and do not want to declare a group array. Let's provide them with helpers working with a single group. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 10cf209a4e82..7698a513b35e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1205,6 +1205,22 @@ extern int __must_check device_add_groups(struct device *dev, extern void device_remove_groups(struct device *dev, const struct attribute_group **groups); +static inline int __must_check device_add_group(struct device *dev, + const struct attribute_group *grp) +{ + const struct attribute_group *groups[] = { grp, NULL }; + + return device_add_groups(dev, groups); +} + +static inline void device_remove_group(struct device *dev, + const struct attribute_group *grp) +{ + const struct attribute_group *groups[] = { grp, NULL }; + + return device_remove_groups(dev, groups); +} + /* * Platform "fixup" functions - allow the platform to have their say * about devices and actions that the general device layer doesn't -- cgit From 57b8ff070f9897b22e4f80fda775a489fc797008 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 19 Jul 2017 17:24:33 -0700 Subject: driver core: add devm_device_add_group() and friends Many drivers create additional driver-specific device attributes when binding to the device, and providing managed version of device_create_group() will simplify unbinding and error handling in probe path for such drivers. Without managed version driver writers either have to mix manual and managed resources, which is prone to errors, or open-code this function by providing a wrapper to device_add_group() and use it with devm_add_action() or devm_add_action_or_reset(). Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 7698a513b35e..f52288c24734 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1221,6 +1221,15 @@ static inline void device_remove_group(struct device *dev, return device_remove_groups(dev, groups); } +extern int __must_check devm_device_add_groups(struct device *dev, + const struct attribute_group **groups); +extern void devm_device_remove_groups(struct device *dev, + const struct attribute_group **groups); +extern int __must_check devm_device_add_group(struct device *dev, + const struct attribute_group *grp); +extern void devm_device_remove_group(struct device *dev, + const struct attribute_group *grp); + /* * Platform "fixup" functions - allow the platform to have their say * about devices and actions that the general device layer doesn't -- cgit From cb08e0353c249a27aed10c6f60a13871ae449d33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 23 Jul 2017 00:03:43 +0200 Subject: PM / timekeeping: Print debug messages when requested The messages printed by tk_debug_account_sleep_time() are basically useful for system sleep debugging, so print them only when the other debug messages from the core suspend/hibernate code are enabled. While at it, make it clear that the messages from tk_debug_account_sleep_time() are about timekeeping suspend duration, because in general timekeeping may be suspeded and resumed for multiple times during one system suspend-resume cycle. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 707e4cdf21c2..97e394feabdb 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -492,16 +492,22 @@ static inline void unlock_system_sleep(void) {} #ifdef CONFIG_PM_SLEEP_DEBUG extern bool pm_print_times_enabled; -extern __printf(1, 2) void pm_pr_dbg(const char *fmt, ...); +extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...); #else #define pm_print_times_enabled (false) #include -#define pm_pr_dbg(fmt, ...) \ +#define __pm_pr_dbg(defer, fmt, ...) \ no_printk(KERN_DEBUG fmt, ##__VA_ARGS__) #endif +#define pm_pr_dbg(fmt, ...) \ + __pm_pr_dbg(false, fmt, ##__VA_ARGS__) + +#define pm_deferred_pr_dbg(fmt, ...) \ + __pm_pr_dbg(true, fmt, ##__VA_ARGS__) + #ifdef CONFIG_PM_AUTOSLEEP /* kernel/power/autosleep.c */ -- cgit From c2327da06b33d8e1093ce2c28f395bc500d1b0d3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Jun 2017 20:09:32 +0300 Subject: x86/io: Remove mem*io() duplications Generic header defines memset_io, memcpy_fromio(). and memcpy_toio(). Reuse them from generic header and remove in x86 code. Move the descriptions to the generic header as well. Signed-off-by: Andy Shevchenko Cc: Baolin Wang Cc: Linus Torvalds Cc: Mika Westerberg Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Cc: linux-i2c@vger.kernel.org Cc: wsa@the-dreams.de Link: http://lkml.kernel.org/r/20170630170934.83028-4-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- include/asm-generic/io.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ef015eb3403..395afc829409 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -954,6 +954,14 @@ static inline void *bus_to_virt(unsigned long address) #ifndef memset_io #define memset_io memset_io +/** + * memset_io Set a range of I/O memory to a constant value + * @addr: The beginning of the I/O-memory range to set + * @val: The value to set the memory to + * @count: The number of bytes to set + * + * Set a range of I/O memory to a given value. + */ static inline void memset_io(volatile void __iomem *addr, int value, size_t size) { @@ -963,6 +971,14 @@ static inline void memset_io(volatile void __iomem *addr, int value, #ifndef memcpy_fromio #define memcpy_fromio memcpy_fromio +/** + * memcpy_fromio Copy a block of data from I/O memory + * @dst: The (RAM) destination for the copy + * @src: The (I/O memory) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data from I/O memory. + */ static inline void memcpy_fromio(void *buffer, const volatile void __iomem *addr, size_t size) @@ -973,6 +989,14 @@ static inline void memcpy_fromio(void *buffer, #ifndef memcpy_toio #define memcpy_toio memcpy_toio +/** + * memcpy_toio Copy a block of data into I/O memory + * @dst: The (I/O memory) destination for the copy + * @src: The (RAM) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data to I/O memory. + */ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, size_t size) { -- cgit From eabc2a7c49c01fc97ff8c764ef7d74276b904af6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 30 Jun 2017 20:09:33 +0300 Subject: x86/io: Remove xlate_dev_kmem_ptr() duplication Generic header defines xlate_dev_kmem_ptr(). Reuse it from generic header and remove in x86 code. Move a description to the generic header as well. Signed-off-by: Andy Shevchenko Cc: Baolin Wang Cc: Linus Torvalds Cc: Mika Westerberg Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: intel-gfx@lists.freedesktop.org Cc: linux-i2c@vger.kernel.org Cc: wsa@the-dreams.de Link: http://lkml.kernel.org/r/20170630170934.83028-5-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- include/asm-generic/io.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 395afc829409..b4531e3b2120 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -915,6 +915,9 @@ extern void ioport_unmap(void __iomem *p); #endif /* CONFIG_GENERIC_IOMAP */ #endif /* CONFIG_HAS_IOPORT_MAP */ +/* + * Convert a virtual cached pointer to an uncached pointer + */ #ifndef xlate_dev_kmem_ptr #define xlate_dev_kmem_ptr xlate_dev_kmem_ptr static inline void *xlate_dev_kmem_ptr(void *addr) -- cgit From 9f08ea848117ab521efcfd3e004d8e1a0edc640c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Jul 2017 20:18:09 +0200 Subject: netfilter: nf_tables: keep chain counters away from hot path These chain counters are only used by the iptables-compat tool, that allow users to use the x_tables extensions from the existing nf_tables framework. This patch makes nf_tables by ~5% for the general usecase, ie. native nft users, where no chain counters are used at all. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8f690effec37..424684c33771 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -49,6 +49,8 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; + +extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; #endif /* _NET_NF_TABLES_CORE_H */ -- cgit From 784b4e612d42a2b7578d7fab2ed78940e10536bc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 19 Jul 2017 16:32:23 +0200 Subject: netfilter: nf_tables: Attach process info to NFT_MSG_NEWGEN notifications This is helpful for 'nft monitor' to track which process caused a given change to the ruleset. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 683f6f88fcac..6f0a950e21c3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1221,6 +1221,8 @@ enum nft_objref_attributes { enum nft_gen_attributes { NFTA_GEN_UNSPEC, NFTA_GEN_ID, + NFTA_GEN_PROC_PID, + NFTA_GEN_PROC_NAME, __NFTA_GEN_MAX }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) -- cgit From d41861942fc55c14b6280d9568a0d0112037f065 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 14 Jun 2017 23:13:34 +0300 Subject: IB/core: Add generic function to extract IB speed from netdev Logic of retrieving netdev speed from net_device and translating it to IB speed is implemented in rxe, in usnic and in bnxt drivers. Define new function which merges all. Signed-off-by: Yuval Shaia Reviewed-by: Christian Benvenuti Reviewed-by: Selvin Xavier Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..68d947dac9a2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3555,6 +3555,7 @@ void ib_drain_qp(struct ib_qp *qp); int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { -- cgit From bded747bb432bc5f7ad6d84ea747368b70ed9df2 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 30 May 2017 09:42:53 +0300 Subject: net/mlx5: Add raw ethernet local loopback firmware command Add support for raw ethernet local loopback firmware command. Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- include/linux/mlx5/vport.h | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 87869c04849a..57c75e8b3c19 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1016,7 +1016,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 reserved_at_3e1[0xa]; + u8 disable_local_lb[0x1]; + u8 reserved_at_3e2[0x9]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; @@ -2562,7 +2563,9 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_nic_vport_context_bits { u8 reserved_at_0[0x5]; u8 min_wqe_inline_mode[0x3]; - u8 reserved_at_8[0x17]; + u8 reserved_at_8[0x15]; + u8 disable_mc_local_lb[0x1]; + u8 disable_uc_local_lb[0x1]; u8 roce_en[0x1]; u8 arm_change_event[0x1]; @@ -5229,7 +5232,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x16]; + u8 reserved_at_0[0x14]; + u8 disable_uc_local_lb[0x1]; + u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; u8 port_guid[0x1]; u8 min_inline[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 656c70b65dd2..aaa0bb9e7655 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -114,5 +114,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, int vf, struct mlx5_hca_vport_context *req); - +int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable); +int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); #endif /* __MLX5_VPORT_H__ */ -- cgit From 4a2da0b8c0782816f3ae6846ae7942fcbb0f8172 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 May 2017 10:05:15 +0300 Subject: IB/mlx5: Add debug control parameters for congestion control This patch adds debug control parameters for congestion control which can be read or written through debugfs. They are for reaction point and notification point nodes. These control parameters are as below: +------------------------------+-----------------------------------------+ | Name | Description | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate | When set target rate is updated to | | | current rate | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate_ati | When set update target rate based on | | | timer as well | |------------------------------+-----------------------------------------| |rp_time_reset | time between rate increase if no | | | CNP is received unit in usec | |------------------------------+-----------------------------------------| |rp_byte_reset | Number of bytes between rate inease if | | | no CNP is received | |------------------------------+-----------------------------------------| |rp_threshold | Threshold for reaction point rate | | | control | |------------------------------+-----------------------------------------| |rp_ai_rate | Rate for target rate, unit in Mbps | |------------------------------+-----------------------------------------| |rp_hai_rate | Rate for hyper increase state | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_min_dec_fac | Minimum factor by which the current | | | transmit rate can be changed when | | | processing a CNP, unit is percerntage | |------------------------------+-----------------------------------------| |rp_min_rate | Minimum value for rate limit, | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_rate_to_set_on_first_cnp | Rate that is set when first CNP is | | | received, unit is Mbps | |------------------------------+-----------------------------------------| |rp_dce_tcp_g | Used to calculate alpha | |------------------------------+-----------------------------------------| |rp_dce_tcp_rtt | Time between updates of alpha value, | | | unit is usec | |------------------------------+-----------------------------------------| |rp_rate_reduce_monitor_period | Minimum time between consecutive rate | | | reductions | |------------------------------+-----------------------------------------| |rp_initial_alpha_value | Initial value of alpha | |------------------------------+-----------------------------------------| |rp_gd | When CNP is received, flow rate is | | | reduced based on gd, rp_gd is given as | | | log2(rp_gd) | |------------------------------+-----------------------------------------| |np_cnp_dscp | dscp code point for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio_mode | 802.1p priority for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio | cnp priority mode | +------------------------------+-----------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57c75e8b3c19..3e697f672de5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1188,7 +1188,8 @@ struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_at_c0[0x12]; u8 cnp_dscp[0x6]; - u8 reserved_at_d8[0x5]; + u8 reserved_at_d8[0x4]; + u8 cnp_prio_mode[0x1]; u8 cnp_802p_prio[0x3]; u8 reserved_at_e0[0x720]; -- cgit From 7ecf6d8ff154e6f7471ee537a400d43a5f3b1c57 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 30 May 2017 10:18:24 +0300 Subject: IB/mlx5: Restore IB guid/policy for virtual functions When a user sets port_guid, node_guid or policy of an IB virtual function, save this information in "struct mlx5_vf_context". This information will be restored later when pci_resume is called. To make sure this works, one can use aer-inject to generate PCI errors on mlx5 devices and verify if relevant fields are restored after PCI resume. Signed-off-by: Bodong Wang Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..54221be5f69e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -162,6 +162,13 @@ enum dbg_rsc_type { MLX5_DBG_RSC_CQ, }; +enum port_state_policy { + MLX5_POLICY_DOWN = 0, + MLX5_POLICY_UP = 1, + MLX5_POLICY_FOLLOW = 2, + MLX5_POLICY_INVALID = 0xffffffff +}; + struct mlx5_field_desc { struct dentry *dent; int i; @@ -525,6 +532,9 @@ struct mlx5_mkey_table { struct mlx5_vf_context { int enabled; + u64 port_guid; + u64 node_guid; + enum port_state_policy policy; }; struct mlx5_core_sriov { @@ -842,13 +852,6 @@ struct mlx5_pas { u8 log_sz; }; -enum port_state_policy { - MLX5_POLICY_DOWN = 0, - MLX5_POLICY_UP = 1, - MLX5_POLICY_FOLLOW = 2, - MLX5_POLICY_INVALID = 0xffffffff -}; - enum phy_port_state { MLX5_AAA_111 }; -- cgit From 7d9336d80b0b35d3537d37ff35e08dcb425073ed Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:10 +0300 Subject: IB/core: Introduce delay drop for a WQ Work queue which is created with IB_WQ_FLAGS_DELAY_DROP won't cause packet drops when there aren't receive WQEs, but will wait until posting of receive WQEs or for some period of time that the device was configured with. It includes: * Add a new creation flag to enable delay drop functionality in a WQ. * A new capability was introduced - IB_RAW_PACKET_CAP_DELAY_DROP, which is the device's ability to delay packet drops when there aren't receive WQEs. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..4a1444456af7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1546,6 +1546,10 @@ enum ib_raw_packet_caps { IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1), /* Checksum offloads are supported (for both send and receive). */ IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2), + /* When a packet is received for an RQ with no receive WQEs, the + * packet processing is delayed. + */ + IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3), }; enum ib_wq_type { @@ -1574,6 +1578,7 @@ struct ib_wq { enum ib_wq_flags { IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0, IB_WQ_FLAGS_SCATTER_FCS = 1 << 1, + IB_WQ_FLAGS_DELAY_DROP = 1 << 2, }; struct ib_wq_init_attr { -- cgit From c1e0bfc1312d0e06bdb24e6e4e7e10b0b4313ec6 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:11 +0300 Subject: net/mlx5: Introduce set delay drop command Add support to SET_DELAY_DROP command. This command will be used in downstream patches for delay packet drop. The timeout value should be indicated by delay_drop_timeout field. Packet processing will be delayed till timeout value passed or until more WQEs are posted. Setting this value to 0 disables the feature. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 25 ++++++++++++++++++++++++- include/linux/mlx5/qp.h | 3 +++ 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3e697f672de5..40c05d29d5bb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -200,6 +200,7 @@ enum { MLX5_CMD_OP_QUERY_SQ = 0x907, MLX5_CMD_OP_CREATE_RQ = 0x908, MLX5_CMD_OP_MODIFY_RQ = 0x909, + MLX5_CMD_OP_SET_DELAY_DROP_PARAMS = 0x910, MLX5_CMD_OP_DESTROY_RQ = 0x90a, MLX5_CMD_OP_QUERY_RQ = 0x90b, MLX5_CMD_OP_CREATE_RMP = 0x90c, @@ -840,7 +841,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 retransmission_q_counters[0x1]; u8 reserved_at_183[0x1]; u8 modify_rq_counter_set_id[0x1]; - u8 reserved_at_185[0x1]; + u8 rq_delay_drop[0x1]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -5853,6 +5854,28 @@ struct mlx5_ifc_destroy_rq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_set_delay_drop_params_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x10]; + u8 delay_drop_timeout[0x10]; +}; + +struct mlx5_ifc_set_delay_drop_params_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_destroy_rmp_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 6f41270d80c0..fff4ec13f620 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -561,6 +561,9 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *out, int outlen); +int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, + u32 timeout_usec); + int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); void mlx5_init_qp_table(struct mlx5_core_dev *dev); -- cgit From 246ac9814c5b2c0e9916dca5fbf8d6a40245fad1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:12 +0300 Subject: net/mlx5: Introduce general notification event When delay drop timeout is expired, the firmware raises general notification event of DELAY_DROP_TIMEOUT subtype. In addition the feature is disable so the driver have to reactivate the timeout. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 5 +++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f31a0b5377e1..a47b9ab9f2c9 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -290,6 +290,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, + MLX5_EVENT_TYPE_GENERAL_EVENT = 0x22, MLX5_EVENT_TYPE_PPS_EVENT = 0x25, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, @@ -304,6 +305,10 @@ enum mlx5_event { MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, }; +enum { + MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, +}; + enum { MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 54221be5f69e..758ef40f9316 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -192,6 +192,7 @@ enum mlx5_dev_event { MLX5_DEV_EVENT_GUID_CHANGE, MLX5_DEV_EVENT_CLIENT_REREG, MLX5_DEV_EVENT_PPS, + MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, }; enum mlx5_port_status { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 40c05d29d5bb..4bc57647fad8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -874,7 +874,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_tc[0x4]; u8 reserved_at_1d0[0x1]; u8 dcbx[0x1]; - u8 reserved_at_1d2[0x3]; + u8 general_notification_event[0x1]; + u8 reserved_at_1d3[0x2]; u8 fpga[0x1]; u8 rol_s[0x1]; u8 rol_g[0x1]; -- cgit From 03404e8ae652e02a5e3388224836cef53d7a0988 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:13 +0300 Subject: IB/mlx5: Add support to dropless RQ RQs that were configured for "delay drop" will prevent packet drops when their WQEs are depleted. Marking an RQ to be drop-less is done by setting delay_drop_en in RQ context using CREATE_RQ command. Since this feature is globally activated/deactivated by using the SET_DELAY_DROP command on all the marked RQs, we activated/deactivated it according to the number of RQs with 'delay_drop' enabled. When timeout is expired, then the feature is deactivated. Therefore the driver handles the delay drop timeout event and reactivate it. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4bc57647fad8..f350688a12fa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2519,7 +2519,7 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x1]; + u8 delay_drop_en[0x1]; u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; -- cgit From be1d325a335840a86c133a56c6a911c368bac0fd Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Jun 2017 11:14:03 +0300 Subject: IB/core: Set RoCEv2 MGID according to spec RoCEv2 Annex states that for RoCEv2 over IPv4, the corresponding IPv4 address is encoded into the GID according to the following rule: GID= :ffff: Remove the 0xff0e prefix for RoCEv2 packets with IPv4 and leave it zeroed and change rdma_is_multicast_addr() to consider the new logic. Signed-off-by: Noa Osherovich Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b73a14edc85e..7aca12188ef3 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -304,7 +304,13 @@ static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) static inline int rdma_is_multicast_addr(struct in6_addr *addr) { - return addr->s6_addr[0] == 0xff; + u32 ipv4_addr; + + if (addr->s6_addr[0] == 0xff) + return 1; + + memcpy(&ipv4_addr, addr->s6_addr + 12, 4); + return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) -- cgit From 02984cc7b3d62418bd72abacaf875c3a9eccdb66 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:06 +0300 Subject: IB/core: Enable QP creation with a given source QP number Enable QP creation with a given source QP number. The created QP will use the source QPN as its wire QP number. This comes as a pre-patch for downstream patches in this series to allow user space applications to accelerate traffic which is typically handled by IPoIB ULP. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4a1444456af7..76a74c783c50 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1059,6 +1059,7 @@ enum ib_qp_create_flags { /* FREE = 1 << 7, */ IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, + IB_QP_CREATE_SOURCE_QPN = 1 << 10, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -1086,6 +1087,7 @@ struct ib_qp_init_attr { */ u8 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; + u32 source_qpn; }; struct ib_qp_open_attr { -- cgit From 2dee0e545894c23b1a2cc2019ac87dffb42e5984 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:07 +0300 Subject: IB/uverbs: Enable QP creation with a given source QP number Enable QP creation with a given source QP number, the created QP will use the source QPN as its wire QP number. To create such a QP, root privileges (i.e. CAP_NET_RAW) are required from the user application. This comes as a pre-patch for downstream patches in this series to allow user space applications to accelerate traffic which is typically handled by IPoIB ULP. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 270c350bedc6..63656d2e8705 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -578,7 +578,7 @@ struct ib_uverbs_ex_create_qp { __u32 comp_mask; __u32 create_flags; __u32 rwq_ind_tbl_handle; - __u32 reserved1; + __u32 source_qpn; }; struct ib_uverbs_open_qp { -- cgit From 4ce749bd94f697772ac2be4fb7e7a92726e94bfb Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:10 +0300 Subject: net/mlx5: Report enhanced capabilities for IPoIB Report 'ipoib_enhanced_offloads' capabilities from the core layer, it will be used in the next patch from this series. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index a47b9ab9f2c9..c13d71deaeca 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -973,7 +973,7 @@ enum mlx5_cap_type { MLX5_CAP_ATOMIC, MLX5_CAP_ROCE, MLX5_CAP_IPOIB_OFFLOADS, - MLX5_CAP_EOIB_OFFLOADS, + MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, MLX5_CAP_ESWITCH, @@ -1016,6 +1016,10 @@ enum mlx5_mcam_feature_groups { MLX5_GET(per_protocol_networking_offload_caps,\ mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) +#define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \ + MLX5_GET(per_protocol_networking_offload_caps,\ + mdev->caps.hca_cur[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS], cap) + #define MLX5_CAP_ROCE(mdev, cap) \ MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap) -- cgit From 3fffc82ad6c78fcc9d5d4eca089f00db14ab0358 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Jun 2017 10:36:16 +0300 Subject: IB/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/qp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fff4ec13f620..66d19b611fe4 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 -#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, -- cgit From 58dcb60a226ee48cc66c96c27b751f06ec2bc5a9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 19 Jun 2017 07:19:37 +0300 Subject: IB/mlx5: Expose extended error counters This patch adds below requester and responder side error counters, which will be exposed by hardware counters interface and are supported as part of query Q counters command extension. +---------------------------+-------------------------------------+ | Name | Description | |---------------------------+-------------------------------------| |resp_local_length_error | Number of times responder detected | | | local length errors | |---------------------------+-------------------------------------| |resp_cqe_error | Number of CQEs completed with error | | | at responder | |---------------------------+-------------------------------------| |req_cqe_error | Number of CQEs completed with error | | | at requester | |---------------------------+-------------------------------------| |req_remote_invalid_request | Number of times requester detected | | | remote invalid request error | |---------------------------+-------------------------------------| |req_remote_access_error | Number of times requester detected | | | remote access error | |---------------------------+-------------------------------------| |resp_remote_access_error | Number of times responder detected | | | remote access error | |---------------------------+-------------------------------------| |resp_cqe_flush_error | Number of CQEs completed with | | | flushed with error at responder | |---------------------------+-------------------------------------| |req_cqe_flush_error | Number of CQEs completed with | | | flushed with error at requester | +---------------------------+-------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 44 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f350688a12fa..5bae70eb25af 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; - u8 reserved_at_1b1[0x1]; + u8 enhanced_error_q_counters[0x1]; u8 ports_check[0x1]; u8 reserved_at_1b3[0x1]; u8 disable_link_up[0x1]; @@ -3953,7 +3953,47 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0x4e0]; + u8 reserved_at_320[0xa0]; + + u8 resp_local_length_error[0x20]; + + u8 req_local_length_error[0x20]; + + u8 resp_local_qp_error[0x20]; + + u8 local_operation_error[0x20]; + + u8 resp_local_protection[0x20]; + + u8 req_local_protection[0x20]; + + u8 resp_cqe_error[0x20]; + + u8 req_cqe_error[0x20]; + + u8 req_mw_binding[0x20]; + + u8 req_bad_response[0x20]; + + u8 req_remote_invalid_request[0x20]; + + u8 resp_remote_invalid_request[0x20]; + + u8 req_remote_access_errors[0x20]; + + u8 resp_remote_access_errors[0x20]; + + u8 req_remote_operation_errors[0x20]; + + u8 req_transport_retries_exceeded[0x20]; + + u8 cq_overflow[0x20]; + + u8 resp_cqe_flush_error[0x20]; + + u8 req_cqe_flush_error[0x20]; + + u8 reserved_at_620[0x1e0]; }; struct mlx5_ifc_query_q_counter_in_bits { -- cgit From ea30b966f7dd6bcfb20c98a7f99608c7bb10bfac Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 21 Jun 2017 09:26:28 +0300 Subject: IB/mlx4: Add inline-receive support When inline-receive is enabled, the HCA may write received data into the receive WQE. Inline-receive is enabled by setting its matching bit in the QP context and each single-packet message with payload not exceeding the receive WQE size will be delivered to the WQE. The completion report will indicate that the payload was placed to the WQE. It includes: 1) Return maximum supported size of inline-receive by the hardware in query_device vendor's data part. 2) Enable the feature when requested by the vendor data input. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index af431752655c..bf3bdba2f326 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -101,7 +101,8 @@ struct mlx4_ib_create_qp { __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; - __u8 reserved[5]; + __u32 inl_recv_sz; + __u8 reserved; }; #endif /* MLX4_ABI_USER_H */ -- cgit From f3301870161ca293cd14b20a802c5646da02407f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Jun 2017 09:29:36 +0300 Subject: (IB, net)/mlx4: Add resource utilization support Adding visibility of resource usage of QPs, CQs and counters used by virtual functions. This feature will be used to give the PF administrator more data while debugging VF status. Usage info was added to ALLOC_RES command, to notify the PF if the resource which is being reserved or allocated for the VF will be used by kernel driver or by user verbs. Updated reservation and allocation functions of QP, CQ and counter with additional usage parameter. Signed-off-by: Moshe Shemesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index aad5d81dfb44..3607da001ad3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -428,6 +428,12 @@ enum mlx4_steer_type { MLX4_NUM_STEERS }; +enum mlx4_resource_usage { + MLX4_RES_USAGE_NONE, + MLX4_RES_USAGE_DRIVER, + MLX4_RES_USAGE_USER_VERBS, +}; + enum { MLX4_NUM_FEXCH = 64 * 1024, }; @@ -748,6 +754,7 @@ struct mlx4_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; + u8 usage; }; struct mlx4_qp { @@ -757,6 +764,7 @@ struct mlx4_qp { atomic_t refcount; struct completion free; + u8 usage; }; struct mlx4_srq { @@ -1120,7 +1128,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags); + int *base, u8 flags, u8 usage); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); @@ -1417,7 +1425,7 @@ int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); -- cgit From 400b1ebcfe31279895f1baa8ecaa390d9a4a0eef Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:24 +0300 Subject: IB/mlx4: Add support for WQ related verbs Support create/modify/destroy WQ related verbs. The base IB object to enable RSS functionality is a WQ (i.e. ib_wq). This patch implements the related WQ verbs as of create, modify and destroy. In downstream patches the WQ will be used as part of an indirection table (i.e. ib_rwq_ind_table) to enable RSS QP creation. Notes: ConnectX-3 hardware requires consecutive WQNs list as receive descriptor queues for the RSS QP. Hence, the driver manages consecutive ranges lists per context which the user must respect. Destroying the WQ does not return its WQN back to its range for reusing. However, destroying all WQs from the same range releases the range and in turn releases its WQNs for reusing. Since the WQ object is not a natural object in the hardware, the driver implements the WQ by the hardware QP. As such, the WQ inherits its port from its RSS QP parent upon its RST->INIT transition and by that time its state is applied to the hardware. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index bf3bdba2f326..c9702a5f0bda 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -105,4 +105,18 @@ struct mlx4_ib_create_qp { __u8 reserved; }; +struct mlx4_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u8 log_range_size; + __u8 reserved[3]; + __u32 comp_mask; + __u32 reserved1; +}; + +struct mlx4_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + #endif /* MLX4_ABI_USER_H */ -- cgit From b8d46ca035060e70f5f0da849d86720752d5aa17 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:25 +0300 Subject: IB/mlx4: Add support for WQ indirection table related verbs To enable RSS functionality the IB indirection table object (i.e. ib_rwq_ind_table) should be used. This patch implements the related verbs as of create and destroy an indirection table. In downstream patches the indirection table will be used as part of RSS QP creation. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index c9702a5f0bda..5591d955ba00 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -119,4 +119,8 @@ struct mlx4_ib_modify_wq { __u32 reserved; }; +struct mlx4_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; #endif /* MLX4_ABI_USER_H */ -- cgit From 3078f5f1bd8b6c8aef77b8ef4d49671fa6eb058e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:26 +0300 Subject: IB/mlx4: Add support for RSS QP Add support to work with a RSS QP by using an indirection table object upon QP creation. Other related QP verbs (e.g. modify/destroy/query) were updated as well for that QP mode. Notes: - The RX hash properties are supplied as driver private data. - The RSS QP port is used on the associated WQs in its indirection table. Applying different ports during WQ life time is not allowed. - The expected RSS QP flow is: create, modify(RST->INIT), modify(RST->RTR), destroy. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 5591d955ba00..d915cab37ec3 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -95,6 +95,16 @@ struct mlx4_ib_create_srq_resp { __u32 reserved; }; +struct mlx4_ib_create_qp_rss { + __u64 rx_hash_fields_mask; + __u8 rx_hash_function; + __u8 rx_key_len; + __u8 reserved[6]; + __u8 rx_hash_key[40]; + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; @@ -123,4 +133,27 @@ struct mlx4_ib_create_rwq_ind_tbl_resp { __u32 response_length; __u32 reserved; }; + +/* RX Hash function flags */ +enum mlx4_ib_rx_hash_function_flags { + MLX4_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + */ +enum mlx4_ib_rx_hash_fields { + MLX4_IB_RX_HASH_SRC_IPV4 = 1 << 0, + MLX4_IB_RX_HASH_DST_IPV4 = 1 << 1, + MLX4_IB_RX_HASH_SRC_IPV6 = 1 << 2, + MLX4_IB_RX_HASH_DST_IPV6 = 1 << 3, + MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + #endif /* MLX4_ABI_USER_H */ -- cgit From d08477aa975e97f1dc64c0ae59cebf98520456ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Jun 2017 09:28:50 -0500 Subject: fcntl: Don't use ambiguous SIG_POLL si_codes We have a weird and problematic intersection of features that when they all come together result in ambiguous siginfo values, that we can not support properly. - Supporting fcntl(F_SETSIG,...) with arbitrary valid signals. - Using positive values for POLL_IN, POLL_OUT, POLL_MSG, ..., etc that imply they are signal specific si_codes and using the aforementioned arbitrary signal to deliver them. - Supporting injection of arbitrary siginfo values for debugging and checkpoint/restore. The result is that just looking at siginfo si_codes of 1 to 6 are ambigious. It could either be a signal specific si_code or it could be a generic si_code. For most of the kernel this is a non-issue but for sending signals with siginfo it is impossible to play back the kernel signals and get the same result. Strictly speaking when the si_code was changed from SI_SIGIO to POLL_IN and friends between 2.2 and 2.4 this functionality was not ambiguous, as only real time signals were supported. Before 2.4 was released the kernel began supporting siginfo with non realtime signals so they could give details of why the signal was sent. The result is that if F_SETSIG is set to one of the signals with signal specific si_codes then user space can not know why the signal was sent. I grepped through a bunch of userspace programs using debian code search to get a feel for how often people choose a signal that results in an ambiguous si_code. I only found one program doing so and it was using SIGCHLD to test the F_SETSIG functionality, and did not appear to be a real world usage. Therefore the ambiguity does not appears to be a real world problem in practice. Remove the ambiguity while introducing the smallest chance of breakage by changing the si_code to SI_SIGIO when signals with signal specific si_codes are targeted. Fixes: v2.3.40 -- Added support for queueing non-rt signals Fixes: v2.3.21 -- Changed the si_code from SI_SIGIO Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 8 ++++++++ include/uapi/asm-generic/siginfo.h | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index e2678b5dbb21..c97cc20369c0 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -380,10 +380,18 @@ int unhandled_signal(struct task_struct *tsk, int sig); rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) +#define SIG_SPECIFIC_SICODES_MASK (\ + rt_sigmask(SIGILL) | rt_sigmask(SIGFPE) | \ + rt_sigmask(SIGSEGV) | rt_sigmask(SIGBUS) | \ + rt_sigmask(SIGTRAP) | rt_sigmask(SIGCHLD) | \ + rt_sigmask(SIGPOLL) | rt_sigmask(SIGSYS) | \ + SIGEMT_MASK ) + #define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) #define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) +#define sig_specific_sicodes(sig) siginmask(sig, SIG_SPECIFIC_SICODES_MASK) #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 9c4eca6b374a..9e956ea94d57 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -184,7 +184,7 @@ typedef struct siginfo { #define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ #define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ -#define SI_SIGIO -5 /* sent by queued SIGIO */ +#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */ #define SI_TKILL -6 /* sent by tkill system call */ #define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ @@ -259,7 +259,7 @@ typedef struct siginfo { #define NSIGCHLD 6 /* - * SIGPOLL si_codes + * SIGPOLL (or any other signal without signal specific si_codes) si_codes */ #define POLL_IN (__SI_POLL|1) /* data input available */ #define POLL_OUT (__SI_POLL|2) /* output buffers available */ -- cgit From cc731525f26af85a1c3537da41e0abd1d35e0bdb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Jul 2017 22:36:59 -0500 Subject: signal: Remove kernel interal si_code magic struct siginfo is a union and the kernel since 2.4 has been hiding a union tag in the high 16bits of si_code using the values: __SI_KILL __SI_TIMER __SI_POLL __SI_FAULT __SI_CHLD __SI_RT __SI_MESGQ __SI_SYS While this looks plausible on the surface, in practice this situation has not worked well. - Injected positive signals are not copied to user space properly unless they have these magic high bits set. - Injected positive signals are not reported properly by signalfd unless they have these magic high bits set. - These kernel internal values leaked to userspace via ptrace_peek_siginfo - It was possible to inject these kernel internal values and cause the the kernel to misbehave. - Kernel developers got confused and expected these kernel internal values in userspace in kernel self tests. - Kernel developers got confused and set si_code to __SI_FAULT which is SI_USER in userspace which causes userspace to think an ordinary user sent the signal and that it was not kernel generated. - The values make it impossible to reorganize the code to transform siginfo_copy_to_user into a plain copy_to_user. As si_code must be massaged before being passed to userspace. So remove these kernel internal si codes and make the kernel code simpler and more maintainable. To replace these kernel internal magic si_codes introduce the helper function siginfo_layout, that takes a signal number and an si_code and computes which union member of siginfo is being used. Have siginfo_layout return an enumeration so that gcc will have enough information to warn if a switch statement does not handle all of union members. A couple of architectures have a messed up ABI that defines signal specific duplications of SI_USER which causes more special cases in siginfo_layout than I would like. The good news is only problem architectures pay the cost. Update all of the code that used the previous magic __SI_ values to use the new SIL_ values and to call siginfo_layout to get those values. Escept where not all of the cases are handled remove the defaults in the switch statements so that if a new case is missed in the future the lack will show up at compile time. Modify the code that copies siginfo si_code to userspace to just copy the value and not cast si_code to a short first. The high bits are no longer used to hold a magic union member. Fixup the siginfo header files to stop including the __SI_ values in their constants and for the headers that were missing it to properly update the number of si_codes for each signal type. The fixes to copy_siginfo_from_user32 implementations has the interesting property that several of them perviously should never have worked as the __SI_ values they depended up where kernel internal. With that dependency gone those implementations should work much better. The idea of not passing the __SI_ values out to userspace and then not reinserting them has been tested with criu and criu worked without changes. Ref: 2.4.0-test1 Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 14 +++++ include/uapi/asm-generic/siginfo.h | 115 +++++++++++++++---------------------- 2 files changed, 60 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index c97cc20369c0..38564e3e54c7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -21,6 +21,20 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from); +enum siginfo_layout { + SIL_KILL, + SIL_TIMER, + SIL_POLL, + SIL_FAULT, + SIL_CHLD, + SIL_RT, +#ifdef __ARCH_SIGSYS + SIL_SYS, +#endif +}; + +enum siginfo_layout siginfo_layout(int sig, int si_code); + /* * Define some primitives to manipulate sigset_t. */ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 9e956ea94d57..e5aa6794cea4 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -151,29 +151,6 @@ typedef struct siginfo { #define si_arch _sifields._sigsys._arch #endif -#ifdef __KERNEL__ -#define __SI_MASK 0xffff0000u -#define __SI_KILL (0 << 16) -#define __SI_TIMER (1 << 16) -#define __SI_POLL (2 << 16) -#define __SI_FAULT (3 << 16) -#define __SI_CHLD (4 << 16) -#define __SI_RT (5 << 16) -#define __SI_MESGQ (6 << 16) -#define __SI_SYS (7 << 16) -#define __SI_CODE(T,N) ((T) | ((N) & 0xffff)) -#else /* __KERNEL__ */ -#define __SI_KILL 0 -#define __SI_TIMER 0 -#define __SI_POLL 0 -#define __SI_FAULT 0 -#define __SI_CHLD 0 -#define __SI_RT 0 -#define __SI_MESGQ 0 -#define __SI_SYS 0 -#define __SI_CODE(T,N) (N) -#endif /* __KERNEL__ */ - /* * si_code values * Digital reserves positive values for kernel-generated signals. @@ -181,10 +158,10 @@ typedef struct siginfo { #define SI_USER 0 /* sent by kill, sigsend, raise */ #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ #define SI_QUEUE -1 /* sent by sigqueue */ -#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ -#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */ +#define SI_TIMER -2 /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ -#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */ +#define SI_SIGIO -5 /* sent by queued SIGIO */ #define SI_TKILL -6 /* sent by tkill system call */ #define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ @@ -194,86 +171,86 @@ typedef struct siginfo { /* * SIGILL si_codes */ -#define ILL_ILLOPC (__SI_FAULT|1) /* illegal opcode */ -#define ILL_ILLOPN (__SI_FAULT|2) /* illegal operand */ -#define ILL_ILLADR (__SI_FAULT|3) /* illegal addressing mode */ -#define ILL_ILLTRP (__SI_FAULT|4) /* illegal trap */ -#define ILL_PRVOPC (__SI_FAULT|5) /* privileged opcode */ -#define ILL_PRVREG (__SI_FAULT|6) /* privileged register */ -#define ILL_COPROC (__SI_FAULT|7) /* coprocessor error */ -#define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */ +#define ILL_ILLOPC 1 /* illegal opcode */ +#define ILL_ILLOPN 2 /* illegal operand */ +#define ILL_ILLADR 3 /* illegal addressing mode */ +#define ILL_ILLTRP 4 /* illegal trap */ +#define ILL_PRVOPC 5 /* privileged opcode */ +#define ILL_PRVREG 6 /* privileged register */ +#define ILL_COPROC 7 /* coprocessor error */ +#define ILL_BADSTK 8 /* internal stack error */ #define NSIGILL 8 /* * SIGFPE si_codes */ -#define FPE_INTDIV (__SI_FAULT|1) /* integer divide by zero */ -#define FPE_INTOVF (__SI_FAULT|2) /* integer overflow */ -#define FPE_FLTDIV (__SI_FAULT|3) /* floating point divide by zero */ -#define FPE_FLTOVF (__SI_FAULT|4) /* floating point overflow */ -#define FPE_FLTUND (__SI_FAULT|5) /* floating point underflow */ -#define FPE_FLTRES (__SI_FAULT|6) /* floating point inexact result */ -#define FPE_FLTINV (__SI_FAULT|7) /* floating point invalid operation */ -#define FPE_FLTSUB (__SI_FAULT|8) /* subscript out of range */ +#define FPE_INTDIV 1 /* integer divide by zero */ +#define FPE_INTOVF 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating point divide by zero */ +#define FPE_FLTOVF 4 /* floating point overflow */ +#define FPE_FLTUND 5 /* floating point underflow */ +#define FPE_FLTRES 6 /* floating point inexact result */ +#define FPE_FLTINV 7 /* floating point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ #define NSIGFPE 8 /* * SIGSEGV si_codes */ -#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ -#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define SEGV_PKUERR (__SI_FAULT|4) /* failed protection key checks */ +#define SEGV_MAPERR 1 /* address not mapped to object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 /* failed protection key checks */ #define NSIGSEGV 4 /* * SIGBUS si_codes */ -#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ -#define BUS_ADRERR (__SI_FAULT|2) /* non-existent physical address */ -#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ +#define BUS_ADRALN 1 /* invalid address alignment */ +#define BUS_ADRERR 2 /* non-existent physical address */ +#define BUS_OBJERR 3 /* object specific hardware error */ /* hardware memory error consumed on a machine check: action required */ -#define BUS_MCEERR_AR (__SI_FAULT|4) +#define BUS_MCEERR_AR 4 /* hardware memory error detected in process but not consumed: action optional*/ -#define BUS_MCEERR_AO (__SI_FAULT|5) +#define BUS_MCEERR_AO 5 #define NSIGBUS 5 /* * SIGTRAP si_codes */ -#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */ -#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ -#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */ -#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */ +#define TRAP_BRKPT 1 /* process breakpoint */ +#define TRAP_TRACE 2 /* process trace trap */ +#define TRAP_BRANCH 3 /* process taken branch trap */ +#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */ #define NSIGTRAP 4 /* * SIGCHLD si_codes */ -#define CLD_EXITED (__SI_CHLD|1) /* child has exited */ -#define CLD_KILLED (__SI_CHLD|2) /* child was killed */ -#define CLD_DUMPED (__SI_CHLD|3) /* child terminated abnormally */ -#define CLD_TRAPPED (__SI_CHLD|4) /* traced child has trapped */ -#define CLD_STOPPED (__SI_CHLD|5) /* child has stopped */ -#define CLD_CONTINUED (__SI_CHLD|6) /* stopped child has continued */ +#define CLD_EXITED 1 /* child has exited */ +#define CLD_KILLED 2 /* child was killed */ +#define CLD_DUMPED 3 /* child terminated abnormally */ +#define CLD_TRAPPED 4 /* traced child has trapped */ +#define CLD_STOPPED 5 /* child has stopped */ +#define CLD_CONTINUED 6 /* stopped child has continued */ #define NSIGCHLD 6 /* * SIGPOLL (or any other signal without signal specific si_codes) si_codes */ -#define POLL_IN (__SI_POLL|1) /* data input available */ -#define POLL_OUT (__SI_POLL|2) /* output buffers available */ -#define POLL_MSG (__SI_POLL|3) /* input message available */ -#define POLL_ERR (__SI_POLL|4) /* i/o error */ -#define POLL_PRI (__SI_POLL|5) /* high priority input available */ -#define POLL_HUP (__SI_POLL|6) /* device disconnected */ +#define POLL_IN 1 /* data input available */ +#define POLL_OUT 2 /* output buffers available */ +#define POLL_MSG 3 /* input message available */ +#define POLL_ERR 4 /* i/o error */ +#define POLL_PRI 5 /* high priority input available */ +#define POLL_HUP 6 /* device disconnected */ #define NSIGPOLL 6 /* * SIGSYS si_codes */ -#define SYS_SECCOMP (__SI_SYS|1) /* seccomp triggered */ -#define NSIGSYS 1 +#define SYS_SECCOMP 1 /* seccomp triggered */ +#define NSIGSYS 1 /* * sigevent definitions -- cgit From 6c9a58e84e59a2fc937798b623f72ae4b436547d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 18 Jul 2017 18:04:17 +0300 Subject: ACPI / boot: Correct address space of __acpi_map_table() Sparse complains about wrong address space used in __acpi_map_table() and in __acpi_unmap_table(). arch/x86/kernel/acpi/boot.c:127:29: warning: incorrect type in return expression (different address spaces) arch/x86/kernel/acpi/boot.c:127:29: expected char * arch/x86/kernel/acpi/boot.c:127:29: got void [noderef] * arch/x86/kernel/acpi/boot.c:135:23: warning: incorrect type in argument 1 (different address spaces) arch/x86/kernel/acpi/boot.c:135:23: expected void [noderef] *addr arch/x86/kernel/acpi/boot.c:135:23: got char *map Correct address space to be in align of type of returned and passed parameter. Reviewed-by: Hanjun Guo Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c749eef1daa1..7443af1d16e7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -228,8 +228,8 @@ struct acpi_subtable_proc { int count; }; -char * __acpi_map_table (unsigned long phys_addr, unsigned long size); -void __acpi_unmap_table(char *map, unsigned long size); +void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); +void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); void acpi_boot_table_init (void); -- cgit From d764a122cc7af7ab1c40c08745f0fcd33cc2f7db Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 21 Jul 2017 12:49:28 +0200 Subject: net: add new netdevice feature for offload of RX port for UDP tunnels This adds a new netdevice feature, so that the offloading of RX port for UDP tunnels can be disabled by the administrator on some netdevices, using the "rx-udp_tunnel-port-offload" feature in ethtool. This feature is set for all devices that provide ndo_udp_tunnel_add. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index ebd273627334..dc8b4896b77b 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -75,6 +75,7 @@ enum { NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */ NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ + NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */ /* * Add your fresh new feature above and remember to update @@ -138,6 +139,7 @@ enum { #define NETIF_F_HW_TC __NETIF_F(HW_TC) #define NETIF_F_HW_ESP __NETIF_F(HW_ESP) #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) +#define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) -- cgit From 296d8ee37c50f139d934bdefbab85509b2e4a525 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 21 Jul 2017 12:49:30 +0200 Subject: net: add infrastructure to un-offload UDP tunnel port This adds a new NETDEV_UDP_TUNNEL_DROP_INFO event, similar to NETDEV_UDP_TUNNEL_PUSH_INFO, to signal to un-offload ports. This also adds udp_tunnel_drop_rx_port(), which calls ndo_udp_tunnel_del. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/udp_tunnel.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614642eb7eb7..3a3cdc1b1f31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2317,6 +2317,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C +#define NETDEV_UDP_TUNNEL_DROP_INFO 0x001D #define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E int register_netdevice_notifier(struct notifier_block *nb); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 02c5be037451..10cce0dd4450 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -115,6 +115,8 @@ struct udp_tunnel_info { /* Notify network devices of offloadable types */ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type); void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type); void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type); @@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev) call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev); } +static inline void udp_tunnel_drop_rx_info(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev); +} + /* Transmit the skb using UDP encapsulation. */ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, -- cgit From afece3ab9a3640f9c1e064f56c8cdd4d783f6144 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Fri, 14 Jul 2017 13:10:15 -0400 Subject: PM / Domains: Add time accounting to various genpd states This patch adds support to calculate the time spent by the generic power domains in on and various idle states. Signed-off-by: Thara Gopinath Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 41004d97cefa..84f423d5633e 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -43,6 +43,7 @@ struct genpd_power_state { s64 power_on_latency_ns; s64 residency_ns; struct fwnode_handle *fwnode; + ktime_t idle_time; }; struct genpd_lock_ops; @@ -78,6 +79,8 @@ struct generic_pm_domain { unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ void *free; /* Free the state that was allocated for default */ + ktime_t on_time; + ktime_t accounting_time; const struct genpd_lock_ops *lock_ops; union { struct mutex mlock; -- cgit From 786f41fb6b008ea4341355b99083a38853a5311d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 21 Jul 2017 02:09:22 +0200 Subject: PM / core: Split dpm_suspend_noirq() and dpm_resume_noirq() Put the device interrupts disabling and enabling as well as cpuidle_pause() and cpuidle_resume() called during the "noirq" stages of system suspend into separate functions to allow the core suspend-to-idle code to be optimized (later). The only functional difference this makes is that debug facilities and diagnostic tools will not include the above operations into the "noirq" device suspend/resume duration measurements. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pm.h b/include/linux/pm.h index b8b4df09fd8f..47ded8aa8a5d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -689,6 +689,8 @@ struct dev_pm_domain { extern void device_pm_lock(void); extern void dpm_resume_start(pm_message_t state); extern void dpm_resume_end(pm_message_t state); +extern void dpm_noirq_resume_devices(pm_message_t state); +extern void dpm_noirq_end(void); extern void dpm_resume_noirq(pm_message_t state); extern void dpm_resume_early(pm_message_t state); extern void dpm_resume(pm_message_t state); @@ -697,6 +699,8 @@ extern void dpm_complete(pm_message_t state); extern void device_pm_unlock(void); extern int dpm_suspend_end(pm_message_t state); extern int dpm_suspend_start(pm_message_t state); +extern void dpm_noirq_begin(void); +extern int dpm_noirq_suspend_devices(pm_message_t state); extern int dpm_suspend_noirq(pm_message_t state); extern int dpm_suspend_late(pm_message_t state); extern int dpm_suspend(pm_message_t state); -- cgit From cb1844c47279fb59129f8a021a0b09bcf2011ad7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:26 +0800 Subject: sctp: remove the typedef sctp_initack_chunk_t This patch is to remove the typedef sctp_initack_chunk_t, and replace with struct sctp_initack_chunk in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 913474dfc96c..05c2099be537 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -336,7 +336,10 @@ struct sctp_hmac_algo_param { * The INIT ACK chunk is used to acknowledge the initiation of an SCTP * association. */ -typedef struct sctp_init_chunk sctp_initack_chunk_t; +struct sctp_initack_chunk { + struct sctp_chunkhdr chunk_hdr; + struct sctp_inithdr init_hdr; +}; /* Section 3.3.3.1 State Cookie (7) */ typedef struct sctp_cookie_param { -- cgit From f48ef4c7f7979e8e658b7e038a82f096ab292d70 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:27 +0800 Subject: sctp: remove the typedef sctp_cookie_param_t This patch is to remove the typedef sctp_cookie_param_t, and replace with struct sctp_cookie_param in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 05c2099be537..9e77abda2111 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -342,10 +342,10 @@ struct sctp_initack_chunk { }; /* Section 3.3.3.1 State Cookie (7) */ -typedef struct sctp_cookie_param { +struct sctp_cookie_param { struct sctp_paramhdr p; __u8 body[0]; -} sctp_cookie_param_t; +}; /* Section 3.3.3.1 Unrecognized Parameters (8) */ typedef struct sctp_unrecognized_param { -- cgit From 62e6b7e4ee244f8043c169c049d3b2c2c798cd60 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:28 +0800 Subject: sctp: remove the typedef sctp_unrecognized_param_t This patch is to remove the typedef sctp_unrecognized_param_t, and replace with struct sctp_unrecognized_param in the places where it's using this typedef. It is also to fix some indents in sctp_sf_do_unexpected_init() and sctp_sf_do_5_1B_init(). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 9e77abda2111..c323b3e3ecdb 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -348,10 +348,10 @@ struct sctp_cookie_param { }; /* Section 3.3.3.1 Unrecognized Parameters (8) */ -typedef struct sctp_unrecognized_param { +struct sctp_unrecognized_param { struct sctp_paramhdr param_hdr; struct sctp_paramhdr unrecognized; -} sctp_unrecognized_param_t; +}; -- cgit From fe9a0fe7210d803adb3d5817da029fe39b8a4133 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:29 +0800 Subject: sctp: remove the typedef sctp_gap_ack_block_t This patch is to remove the typedef sctp_gap_ack_block_t, and replace with struct sctp_gap_ack_block in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c323b3e3ecdb..b84b8e8340da 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -363,16 +363,16 @@ struct sctp_unrecognized_param { * subsequences of DATA chunks as represented by their TSNs. */ -typedef struct sctp_gap_ack_block { +struct sctp_gap_ack_block { __be16 start; __be16 end; -} sctp_gap_ack_block_t; +}; typedef __be32 sctp_dup_tsn_t; typedef union { - sctp_gap_ack_block_t gab; - sctp_dup_tsn_t dup; + struct sctp_gap_ack_block gab; + sctp_dup_tsn_t dup; } sctp_sack_variable_t; typedef struct sctp_sackhdr { -- cgit From 9b41515636563ae76e730dbcb97fd303b94ed7d9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:30 +0800 Subject: sctp: remove the typedef sctp_dup_tsn_t This patch is to remove the typedef sctp_dup_tsn_t, and replace with __be32 in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b84b8e8340da..8faf74eff63d 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -368,11 +368,9 @@ struct sctp_gap_ack_block { __be16 end; }; -typedef __be32 sctp_dup_tsn_t; - typedef union { struct sctp_gap_ack_block gab; - sctp_dup_tsn_t dup; + __be32 dup; } sctp_sack_variable_t; typedef struct sctp_sackhdr { -- cgit From afd93b7be6e24731d82d9fd84b8a5ea73a68214b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:31 +0800 Subject: sctp: remove the typedef sctp_sack_variable_t This patch is to remove the typedef sctp_sack_variable_t, and replace with union sctp_sack_variable in the places where it's using this typedef. It is also to fix some indents in sctp_acked(). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8faf74eff63d..8df6ac53f05b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -368,17 +368,17 @@ struct sctp_gap_ack_block { __be16 end; }; -typedef union { +union sctp_sack_variable { struct sctp_gap_ack_block gab; __be32 dup; -} sctp_sack_variable_t; +}; typedef struct sctp_sackhdr { __be32 cum_tsn_ack; __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - sctp_sack_variable_t variable[0]; + union sctp_sack_variable variable[0]; } sctp_sackhdr_t; typedef struct sctp_sack_chunk { -- cgit From 787310859d8d1a72545db2343fb3ac8f765b0f35 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:32 +0800 Subject: sctp: remove the typedef sctp_sackhdr_t This patch is to remove the typedef sctp_sackhdr_t, and replace with struct sctp_sackhdr in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- include/net/sctp/command.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8df6ac53f05b..a2e43129d11a 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -373,17 +373,17 @@ union sctp_sack_variable { __be32 dup; }; -typedef struct sctp_sackhdr { +struct sctp_sackhdr { __be32 cum_tsn_ack; __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; union sctp_sack_variable variable[0]; -} sctp_sackhdr_t; +}; typedef struct sctp_sack_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_sackhdr_t sack_hdr; + struct sctp_sackhdr sack_hdr; } sctp_sack_chunk_t; diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index d4679e7a5ed5..1d5f6ff3f440 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -135,7 +135,7 @@ typedef union { struct sctp_init_chunk *init; struct sctp_ulpevent *ulpevent; struct sctp_packet *packet; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; } sctp_arg_t; @@ -176,7 +176,7 @@ SCTP_ARG_CONSTRUCTOR(BA, struct sctp_bind_addr *, bp) SCTP_ARG_CONSTRUCTOR(PEER_INIT, struct sctp_init_chunk *, init) SCTP_ARG_CONSTRUCTOR(ULPEVENT, struct sctp_ulpevent *, ulpevent) SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) -SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) +SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) static inline sctp_arg_t SCTP_FORCE(void) -- cgit From d4d6c61489e7e4a8944360312e572988889558a8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:33 +0800 Subject: sctp: remove the typedef sctp_sack_chunk_t This patch is to remove the typedef sctp_sack_chunk_t, and replace with struct sctp_sack_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a2e43129d11a..48f6560dd880 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -381,10 +381,10 @@ struct sctp_sackhdr { union sctp_sack_variable variable[0]; }; -typedef struct sctp_sack_chunk { +struct sctp_sack_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_sackhdr sack_hdr; -} sctp_sack_chunk_t; +}; /* RFC 2960. Section 3.3.5 Heartbeat Request (HEARTBEAT) (4): -- cgit From 4d2dcdf4e04c938b266f06f271000e4b0f3a288f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:34 +0800 Subject: sctp: remove the typedef sctp_heartbeathdr_t This patch is to remove the typedef sctp_heartbeathdr_t, and replace with struct sctp_heartbeathdr in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 48f6560dd880..6e26b86770f1 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -394,13 +394,13 @@ struct sctp_sack_chunk { * the present association. */ -typedef struct sctp_heartbeathdr { +struct sctp_heartbeathdr { struct sctp_paramhdr info; -} sctp_heartbeathdr_t; +}; typedef struct sctp_heartbeat_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_heartbeathdr_t hb_hdr; + struct sctp_heartbeathdr hb_hdr; } sctp_heartbeat_chunk_t; -- cgit From 38c00f7482281801d6f7fe410c7a4b61ae25218e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:35 +0800 Subject: sctp: remove the typedef sctp_heartbeat_chunk_t This patch is to remove the typedef sctp_heartbeat_chunk_t, and replace with struct sctp_heartbeat_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 6e26b86770f1..bfda7c66960c 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -398,10 +398,10 @@ struct sctp_heartbeathdr { struct sctp_paramhdr info; }; -typedef struct sctp_heartbeat_chunk { +struct sctp_heartbeat_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_heartbeathdr hb_hdr; -} sctp_heartbeat_chunk_t; +}; /* For the abort and shutdown ACK we must carry the init tag in the -- cgit From 441ae65ae000dd325a609306bd0cd850df50cfc4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 23 Jul 2017 09:34:36 +0800 Subject: sctp: remove the typedef sctp_abort_chunk_t This patch is to remove the typedef sctp_abort_chunk_t, and replace with struct sctp_abort_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index bfda7c66960c..ba007163acfd 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -408,9 +408,9 @@ struct sctp_heartbeat_chunk { * common header. Just the common header is all that is needed with a * chunk descriptor. */ -typedef struct sctp_abort_chunk { +struct sctp_abort_chunk { struct sctp_chunkhdr uh; -} sctp_abort_chunk_t; +}; /* For the graceful shutdown we must carry the tag (in common header) -- cgit From 4e3274705324c695f56b3e1c194cd99b76eeea0f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Jun 2017 16:02:24 -0700 Subject: init_task: Remove redundant INIT_TASK_RCU_TREE_PREEMPT() macro Back in the dim distant past, the task_struct structure's RCU-related fields optionally included those needed for CONFIG_RCU_BOOST, even in CONFIG_PREEMPT_RCU builds. The INIT_TASK_RCU_TREE_PREEMPT() macro was used to provide initializers for those optional CONFIG_RCU_BOOST fields. However, the CONFIG_RCU_BOOST fields are now included unconditionally in CONFIG_PREEMPT_RCU builds, so there is no longer any need fro the INIT_TASK_RCU_TREE_PREEMPT() macro. This commit therefore removes it in favor of initializing the ->rcu_blocked_node field directly in the INIT_TASK_RCU_PREEMPT() macro. Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index a2f6707e9fc0..0e849715e5be 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -125,18 +125,12 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_PREEMPT_RCU -#define INIT_TASK_RCU_TREE_PREEMPT() \ - .rcu_blocked_node = NULL, -#else -#define INIT_TASK_RCU_TREE_PREEMPT(tsk) -#endif #ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special.s = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ - INIT_TASK_RCU_TREE_PREEMPT() + .rcu_blocked_node = NULL, #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif -- cgit From 825c5bd2fd47d30148db15fc121216c483682b01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 May 2017 16:16:40 -0700 Subject: srcu: Move rcu_scheduler_starting() from Tiny RCU to Tiny SRCU Other than lockdep support, Tiny RCU has no need for the scheduler status. However, Tiny SRCU will need this to control boot-time behavior independent of lockdep. Therefore, this commit moves rcu_scheduler_starting() from kernel/rcu/tiny_plugin.h to kernel/rcu/srcutiny.c. This in turn allows the complete removal of kernel/rcu/tiny_plugin.h. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 1 + include/linux/rcutiny.h | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f816fc72b51e..55c29e1650b9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -105,6 +105,7 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); +extern int rcu_scheduler_active __read_mostly; void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5becbbccb998..b3dbf9502fd0 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -116,13 +116,11 @@ static inline void rcu_irq_exit_irqson(void) { } static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void exit_rcu(void) { } - -#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) -extern int rcu_scheduler_active __read_mostly; +#ifdef CONFIG_SRCU void rcu_scheduler_starting(void); -#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ +#else /* #ifndef CONFIG_SRCU */ static inline void rcu_scheduler_starting(void) { } -#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ +#endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } -- cgit From 0d8a1e831e21d955af68f4ae5658b58b7ec25557 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 Jun 2017 17:06:38 -0700 Subject: srcu: Make process_srcu() be static The function process_srcu() is not invoked outside of srcutree.c, so this commit makes it static and drops the EXPORT_SYMBOL_GPL(). Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 42973f787e7e..a026a9493bde 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -104,8 +104,6 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -void process_srcu(struct work_struct *work); - #define __SRCU_STRUCT_INIT(name) \ { \ .sda = &name##_srcu_data, \ -- cgit From 115a1a5285664f1931c30457081b4ae1e648f1f9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 May 2017 13:31:03 -0700 Subject: rcutorture: Move SRCU status printing to SRCU implementations This commit gets rid of some ugly #ifdefs in rcutorture.c by moving the SRCU status printing to the SRCU implementations. Signed-off-by: Paul E. McKenney --- include/linux/srcutiny.h | 13 +++++++++++++ include/linux/srcutree.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index cfbfc540cafc..261471f407a5 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -87,4 +87,17 @@ static inline void srcu_barrier(struct srcu_struct *sp) synchronize_srcu(sp); } +/* Defined here to avoid size increase for non-torture kernels. */ +static inline void srcu_torture_stats_print(struct srcu_struct *sp, + char *tt, char *tf) +{ + int idx; + + idx = READ_ONCE(sp->srcu_idx) & 0x1; + pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", + tt, tf, idx, + READ_ONCE(sp->srcu_lock_nesting[!idx]), + READ_ONCE(sp->srcu_lock_nesting[idx])); +} + #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 42973f787e7e..7886356bdc84 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -141,5 +141,6 @@ void process_srcu(struct work_struct *work); void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); +void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf); #endif -- cgit From b3c983142d4584c9d506b1ed31b65f4292b4aea8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 6 Jun 2017 16:39:00 -0700 Subject: rcutorture: Place event-traced strings into trace buffer Strings used in event tracing need to be specially handled, for example, being copied to the trace buffer instead of being pointed to by the trace buffer. Although the TPS() macro can be used to "launder" pointed-to strings, this might not be all that effective within a loadable module. This commit therefore copies rcutorture's strings to the trace buffer. Signed-off-by: Paul E. McKenney Cc: Steven Rostedt --- include/trace/events/rcu.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 91dc089d65b7..e91ae1f2290d 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -703,6 +703,7 @@ TRACE_EVENT(rcu_batch_end, * at the beginning and end of the read, respectively. Note that the * callback address can be NULL. */ +#define RCUTORTURENAME_LEN 8 TRACE_EVENT(rcu_torture_read, TP_PROTO(const char *rcutorturename, struct rcu_head *rhp, @@ -711,7 +712,7 @@ TRACE_EVENT(rcu_torture_read, TP_ARGS(rcutorturename, rhp, secs, c_old, c), TP_STRUCT__entry( - __field(const char *, rcutorturename) + __field(char, rcutorturename[RCUTORTURENAME_LEN]) __field(struct rcu_head *, rhp) __field(unsigned long, secs) __field(unsigned long, c_old) @@ -719,7 +720,9 @@ TRACE_EVENT(rcu_torture_read, ), TP_fast_assign( - __entry->rcutorturename = rcutorturename; + strncpy(__entry->rcutorturename, rcutorturename, + RCUTORTURENAME_LEN); + __entry->rcutorturename[RCUTORTURENAME_LEN - 1] = 0; __entry->rhp = rhp; __entry->secs = secs; __entry->c_old = c_old; -- cgit From e42e24c3cc072088756d84ef07b492ac2a3ae2e5 Mon Sep 17 00:00:00 2001 From: Matvejchikov Ilya Date: Mon, 24 Jul 2017 16:02:12 +0400 Subject: tcp: remove redundant argument from tcp_rcv_established() The last (4th) argument of tcp_rcv_established() is redundant as it always equals to skb->len and the skb itself is always passed as 2th agrument. There is no reason to have it. Signed-off-by: Ilya V. Matveychikov Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 4f056ea79df2..12d68335acd4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -362,7 +362,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); + const struct tcphdr *th); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); -- cgit From aa5d1b81500e6059190f18fe25a7617682321910 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Jul 2017 11:35:48 -0700 Subject: x86/asm: Add ASM_UNREACHABLE This creates an unreachable annotation in asm for CONFIG_STACK_VALIDATION=y. While here, adjust earlier uses of \t\n into \n\t. Suggested-by: Josh Poimboeuf Signed-off-by: Kees Cook Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: David S. Miller Cc: Davidlohr Bueso Cc: Elena Reshetova Cc: Eric Biggers Cc: Eric W. Biederman Cc: Greg KH Cc: Hans Liljestrand Cc: James Bottomley Cc: Jann Horn Cc: Linus Torvalds Cc: Manfred Spraul Cc: Peter Zijlstra Cc: Rik van Riel Cc: Serge E. Hallyn Cc: Thomas Gleixner Cc: arozansk@redhat.com Cc: axboe@kernel.dk Cc: kernel-hardening@lists.openwall.com Cc: linux-arch Link: http://lkml.kernel.org/r/1500921349-10803-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 13 +++++++++---- include/linux/compiler.h | 3 +++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cd4bbe8242bd..179375b2d862 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -203,11 +203,16 @@ #ifdef CONFIG_STACK_VALIDATION #define annotate_unreachable() ({ \ - asm("%c0:\t\n" \ - ".pushsection .discard.unreachable\t\n" \ - ".long %c0b - .\t\n" \ - ".popsection\t\n" : : "i" (__LINE__)); \ + asm("%c0:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__LINE__)); \ }) +#define ASM_UNREACHABLE \ + "999:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" #else #define annotate_unreachable() #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 219f82f3ec1a..641f5912d75f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -185,6 +185,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* Unreachable code */ +#ifndef ASM_UNREACHABLE +# define ASM_UNREACHABLE +#endif #ifndef unreachable # define unreachable() do { } while (1) #endif -- cgit From 885dcd709ba9120b9935415b8b0f9d1b94e5826b Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 19 Jul 2017 03:06:34 +0530 Subject: powerpc/perf: Add nest IMC PMU support Add support to register Nest In-Memory Collection PMU counters. Patch adds a new device file called "imc-pmu.c" under powerpc/perf folder to contain all the device PMU functions. Device tree parser code added to parse the PMU events information and create sysfs event attributes for the PMU. Cpumask attribute added along with Cpu hotplug online/offline functions specific for nest PMU. A new state "CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE" added for the cpu hotplug callbacks. Error handle path frees the memory and unregisters the CPU hotplug callbacks. Signed-off-by: Anju T Sudhakar Signed-off-by: Hemant Kumar Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b56573bf440d..0853a14b1fa1 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -139,6 +139,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, + CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, -- cgit From 39a846db1d574a498511ffccd75223a35cdcb059 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 19 Jul 2017 03:06:35 +0530 Subject: powerpc/perf: Add core IMC PMU support Add support to register Core In-Memory Collection PMU counters. Patch adds core IMC specific data structures, along with memory init functions and CPU hotplug support. Signed-off-by: Anju T Sudhakar Signed-off-by: Hemant Kumar Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0853a14b1fa1..1be505db0090 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -140,6 +140,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, + CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, -- cgit From f74c89bd80fb3f1328fdf4a44eeba793cdce4222 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 19 Jul 2017 03:06:36 +0530 Subject: powerpc/perf: Add thread IMC PMU support Add support to register Thread In-Memory Collection PMU counters. Patch adds thread IMC specific data structures, along with memory init functions and CPU hotplug support. Signed-off-by: Anju T Sudhakar Signed-off-by: Hemant Kumar Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 1be505db0090..1bc7dcfbf7b3 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -141,6 +141,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, + CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, -- cgit From 67bd22c09ac1b54b5f42445783d2dc8e011f7a71 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 19 Jul 2017 12:04:07 -0500 Subject: power: supply: bq27xxx: move platform driver code into bq27xxx_battery_hdq.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the BQ27xxx driver was originally written the w1 subsystem only allowed device drivers for w1 attached devices to live in the w1 subsystem. Kernel driver subsystems expect that the driver for a device live in the directory of the subsystem for which it implements functionality, not in the directory of the bus that it is attached. To work around this, the BQ27xxx driver was implemented as a platform device driver and the interface driver would instantiate this device from within the w1 directory, then pass a w1 read callback as platform data. As we can now have the w1 interface driver in the power/supply directory (like we do already with the i2c interface driver) we can remove this middle-layer platform driver. Signed-off-by: Andrew F. Davis Acked-by: Pali Rohár Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 11e11685dd1d..89890437f9ab 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -20,23 +20,6 @@ enum bq27xxx_chip { BQ27421, /* bq27421, bq27425, bq27441, bq27621 */ }; -/** - * struct bq27xxx_plaform_data - Platform data for bq27xxx devices - * @name: Name of the battery. - * @chip: Chip class number of this device. - * @read: HDQ read callback. - * This function should provide access to the HDQ bus the battery is - * connected to. - * The first parameter is a pointer to the battery device, the second the - * register to be read. The return value should either be the content of - * the passed register or an error value. - */ -struct bq27xxx_platform_data { - const char *name; - enum bq27xxx_chip chip; - int (*read)(struct device *dev, unsigned int); -}; - struct bq27xxx_device_info; struct bq27xxx_access_methods { int (*read)(struct bq27xxx_device_info *di, u8 reg, bool single); -- cgit From 1ee6f00d1164955b7bdadd36fc0f2736754784d9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 25 Jul 2017 09:44:51 -0500 Subject: x86/asm: Make objtool unreachable macros independent from GCC version The ASM_UNREACHABLE macro isn't GCC version-specific, so move it outside the GCC 4.5+ check. Otherwise the 0-day robot will report objtool warnings for uses of ASM_UNREACHABLE with GCC 4.4. Also move the annotate_unreachable() macro so the related macros can stay together. Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: aa5d1b81500e ("x86/asm: Add ASM_UNREACHABLE") Link: http://lkml.kernel.org/r/fb18337dbf230fd36450d9faf19a2b2533dbcba1.1500993873.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 179375b2d862..17cbe3cffc0e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -128,6 +128,22 @@ #define __always_unused __attribute__((unused)) #define __mode(x) __attribute__((mode(x))) +#ifdef CONFIG_STACK_VALIDATION +#define annotate_unreachable() ({ \ + asm("%c0:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__LINE__)); \ +}) +#define ASM_UNREACHABLE \ + "999:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" +#else +#define annotate_unreachable() +#endif + /* gcc version specific checks */ #if GCC_VERSION < 30200 @@ -201,22 +217,6 @@ #endif #endif -#ifdef CONFIG_STACK_VALIDATION -#define annotate_unreachable() ({ \ - asm("%c0:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__LINE__)); \ -}) -#define ASM_UNREACHABLE \ - "999:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long 999b - .\n\t" \ - ".popsection\n\t" -#else -#define annotate_unreachable() -#endif - /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer -- cgit From 931ab4a5ce2d93cb434dc7fc067a702117453b13 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Jul 2017 09:59:08 -0700 Subject: atomics: Revert addition of comment header to spin_unlock_wait() There is still considerable confusion as to the semantics of spin_unlock_wait(), but there seems to be universal agreement that it is not that of a lock/unlock pair. This commit therefore removes the comment added by 6016ffc3874d ("atomics: Add header comment so spin_unlock_wait()") in order to prevent at least that flavor of confusion. Signed-off-by: Paul E. McKenney --- include/linux/spinlock.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d9510e8522d4..59248dcc6ef3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -369,26 +369,6 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock) raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ }) -/** - * spin_unlock_wait - Interpose between successive critical sections - * @lock: the spinlock whose critical sections are to be interposed. - * - * Semantically this is equivalent to a spin_lock() immediately - * followed by a spin_unlock(). However, most architectures have - * more efficient implementations in which the spin_unlock_wait() - * cannot block concurrent lock acquisition, and in some cases - * where spin_unlock_wait() does not write to the lock variable. - * Nevertheless, spin_unlock_wait() can have high overhead, so if - * you feel the need to use it, please check to see if there is - * a better way to get your job done. - * - * The ordering guarantees provided by spin_unlock_wait() are: - * - * 1. All accesses preceding the spin_unlock_wait() happen before - * any accesses in later critical sections for this same lock. - * 2. All accesses following the spin_unlock_wait() happen after - * any accesses in earlier critical sections for this same lock. - */ static __always_inline void spin_unlock_wait(spinlock_t *lock) { raw_spin_unlock_wait(&lock->rlock); -- cgit From a58163d8ca2c8d288ee9f95989712f98473a5ac2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Jun 2017 12:11:34 -0700 Subject: rcu: Migrate callbacks earlier in the CPU-offline timeline RCU callbacks must be migrated away from an outgoing CPU, and this is done near the end of the CPU-hotplug operation, after the outgoing CPU is long gone. Unfortunately, this means that other CPU-hotplug callbacks can execute while the outgoing CPU's callbacks are still immobilized on the long-gone CPU's callback lists. If any of these CPU-hotplug callbacks must wait, either directly or indirectly, for the invocation of any of the immobilized RCU callbacks, the system will hang. This commit avoids such hangs by migrating the callbacks away from the outgoing CPU immediately upon its departure, shortly after the return from __cpu_die() in takedown_cpu(). Thus, RCU is able to advance these callbacks and invoke them, which allows all the after-the-fact CPU-hotplug callbacks to wait on these RCU callbacks without risk of a hang. While in the neighborhood, this commit also moves rcu_send_cbs_to_orphanage() and rcu_adopt_orphan_cbs() under a pre-existing #ifdef to avoid including dead code on the one hand and to avoid define-without-use warnings on the other hand. Reported-by: Jeffrey Hugo Link: http://lkml.kernel.org/r/db9c91f6-1b17-6136-84f0-03c3c2581ab4@codeaurora.org Signed-off-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Ingo Molnar Cc: Anna-Maria Gleixner Cc: Boris Ostrovsky Cc: Richard Weinberger --- include/linux/rcupdate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f816fc72b51e..cf307ebf345d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -110,6 +110,7 @@ void rcu_bh_qs(void); void rcu_check_callbacks(int user); void rcu_report_dead(unsigned int cpu); void rcu_cpu_starting(unsigned int cpu); +void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); -- cgit From ed4676e254630c1f00a4fc8f3821890fff7e3643 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:46 +0530 Subject: cpufreq: Replace "max_transition_latency" with "dynamic_switching" There is no limitation in the ondemand or conservative governors which disallow the transition_latency to be greater than 10 ms. The max_transition_latency field is rather used to disallow automatic dynamic frequency switching for platforms which didn't wanted these governors to run. Replace max_transition_latency with a boolean (dynamic_switching) and check for transition_latency == CPUFREQ_ETERNAL along with that. This makes it pretty straight forward to read/understand now. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index aaadfc543f63..e141dbbb9d1c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -487,12 +487,8 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, * polling frequency is 1000 times the transition latency of the processor. The * ondemand governor will work on any processor with transition latency <= 10ms, * using appropriate sampling rate. - * - * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) - * the ondemand governor will not work. All times here are in us (microseconds). */ #define LATENCY_MULTIPLIER (1000) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; @@ -505,9 +501,8 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - unsigned int max_transition_latency; /* HW must be able to switch to - next freq faster than this value in nano secs or we - will fallback to performance governor */ + /* For governors which change frequency dynamically by themselves */ + bool dynamic_switching; struct list_head governor_list; struct module *owner; }; -- cgit From fe829ed8ef1f3c7ac22843bd594ef2f6c4044288 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:48 +0530 Subject: cpufreq: Add CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING cpufreq driver flag The policy->transition_latency field is used for multiple purposes today and its not straight forward at all. This is how it is used: A. Set the correct transition_latency value. B. Set it to CPUFREQ_ETERNAL because: 1. We don't want automatic dynamic switching (with ondemand/conservative) to happen at all. 2. We don't know the transition latency. This patch handles the B.1. case in a more readable way. A new flag for the cpufreq drivers is added to disallow use of cpufreq governors which have dynamic_switching flag set. All the current cpufreq drivers which are setting transition_latency unconditionally to CPUFREQ_ETERNAL are updated to use it. They don't need to set transition_latency anymore. There shouldn't be any functional change after this patch. Signed-off-by: Viresh Kumar Reviewed-by: Dominik Brodowski Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index e141dbbb9d1c..5f40522ec98c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -370,6 +370,12 @@ struct cpufreq_driver { */ #define CPUFREQ_NEED_INITIAL_FREQ_CHECK (1 << 5) +/* + * Set by drivers to disallow use of governors with "dynamic_switching" flag + * set. + */ +#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING (1 << 6) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit From ebae3e830a991116526646f09dd67f68f332b330 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 5 Jul 2017 20:27:53 +0300 Subject: iommu: Correct iommu_map / iommu_unmap prototypes Commit 7d3002cc8c16 ("iommu/core: split mapping to page sizes as supported by the hardware") replaced 'int gfp_order' with a 'size_t size' of iommu_map / iommu_unmap function arguments, but missed the function prototypes for the disabled CONFIG_IOMMU_API case, let's correct them for consistency. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..f1ce8e517d8d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -425,13 +425,13 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, int gfp_order, int prot) + phys_addr_t paddr, size_t size, int prot) { return -ENODEV; } static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, - int gfp_order) + size_t size) { return -ENODEV; } -- cgit From f06e8c584fa0d05312c11ea66194f3d2efb93c21 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 22 Jun 2017 16:14:17 +0200 Subject: kasan: Allow kasan_check_read/write() to accept pointers to volatiles Currently kasan_check_read/write() accept 'const void*', make them accept 'const volatile void*'. This is required for instrumentation of atomic operations and there is just no reason to not allow that. Signed-off-by: Dmitry Vyukov Reviewed-by: Andrey Ryabinin Acked-by: Mark Rutland Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/33e5ec275c1ee89299245b2ebbccd63709c6021f.1498140838.git.dvyukov@google.com Signed-off-by: Ingo Molnar --- include/linux/kasan-checks.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index b7f8aced7870..41960fecf783 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h @@ -2,11 +2,13 @@ #define _LINUX_KASAN_CHECKS_H #ifdef CONFIG_KASAN -void kasan_check_read(const void *p, unsigned int size); -void kasan_check_write(const void *p, unsigned int size); +void kasan_check_read(const volatile void *p, unsigned int size); +void kasan_check_write(const volatile void *p, unsigned int size); #else -static inline void kasan_check_read(const void *p, unsigned int size) { } -static inline void kasan_check_write(const void *p, unsigned int size) { } +static inline void kasan_check_read(const volatile void *p, unsigned int size) +{ } +static inline void kasan_check_write(const volatile void *p, unsigned int size) +{ } #endif #endif -- cgit From ee9f8fce99640811b2b8e79d0d1dbe8bab69ba67 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Jul 2017 18:36:57 -0500 Subject: x86/unwind: Add the ORC unwinder Add the new ORC unwinder which is enabled by CONFIG_ORC_UNWINDER=y. It plugs into the existing x86 unwinder framework. It relies on objtool to generate the needed .orc_unwind and .orc_unwind_ip sections. For more details on why ORC is used instead of DWARF, see Documentation/x86/orc-unwinder.txt - but the short version is that it's a simplified, fundamentally more robust debugninfo data structure, which also allows up to two orders of magnitude faster lookups than the DWARF unwinder - which matters to profiling workloads like perf. Thanks to Andy Lutomirski for the performance improvement ideas: splitting the ORC unwind table into two parallel arrays and creating a fast lookup table to search a subset of the unwind table. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Slaby Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/0a6cbfb40f8da99b7a45a1a8302dc6aef16ec812.1500938583.git.jpoimboe@redhat.com [ Extended the changelog. ] Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da0be9a8d1de..fffc9bdae025 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -680,6 +680,31 @@ #define BUG_TABLE #endif +#ifdef CONFIG_ORC_UNWINDER +#define ORC_UNWIND_TABLE \ + . = ALIGN(4); \ + .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_orc_unwind_ip) = .; \ + KEEP(*(.orc_unwind_ip)) \ + VMLINUX_SYMBOL(__stop_orc_unwind_ip) = .; \ + } \ + . = ALIGN(6); \ + .orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_orc_unwind) = .; \ + KEEP(*(.orc_unwind)) \ + VMLINUX_SYMBOL(__stop_orc_unwind) = .; \ + } \ + . = ALIGN(4); \ + .orc_lookup : AT(ADDR(.orc_lookup) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(orc_lookup) = .; \ + . += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) / \ + LOOKUP_BLOCK_SIZE) + 1) * 4; \ + VMLINUX_SYMBOL(orc_lookup_end) = .; \ + } +#else +#define ORC_UNWIND_TABLE +#endif + #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ @@ -866,7 +891,7 @@ DATA_DATA \ CONSTRUCTORS \ } \ - BUG_TABLE + BUG_TABLE \ #define INIT_TEXT_SECTION(inittext_align) \ . = ALIGN(inittext_align); \ -- cgit From a7e1149b64091ac87dfb18b177abb63ba4cbbd3d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 26 Jul 2017 05:17:08 +0000 Subject: ASoC: remove cache_bypass from snd_soc_codec snd_soc_codec .cache_bypass related operation code has been removed. Let's remove remaining code. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 9c94b97c17f8..10b71beb9f13 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -894,7 +894,6 @@ struct snd_soc_codec { struct list_head list; /* runtime */ - unsigned int cache_bypass:1; /* Suppress access to the cache */ unsigned int cache_init:1; /* codec cache has been initialized */ /* codec IO */ -- cgit From 85f7ff9702bcc5e899bd0bf6b6e383ecb2ac436a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 2 Jun 2017 05:30:02 -0400 Subject: media: v4l2-flash: Use led_classdev instead of led_classdev_flash for indicator The V4L2 flash class initialisation expects struct led_classdev_flash that describes an indicator but only uses struct led_classdev which is a field iled_cdev in the struct. Use struct iled_cdev only. Signed-off-by: Sakari Ailus Reviewed-by: Jacek Anaszewski Reviewed-by: Sebastian Reichel Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-flash-led-class.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-flash-led-class.h b/include/media/v4l2-flash-led-class.h index f9dcd54c1745..54e31a805a88 100644 --- a/include/media/v4l2-flash-led-class.h +++ b/include/media/v4l2-flash-led-class.h @@ -85,7 +85,7 @@ struct v4l2_flash_config { */ struct v4l2_flash { struct led_classdev_flash *fled_cdev; - struct led_classdev_flash *iled_cdev; + struct led_classdev *iled_cdev; const struct v4l2_flash_ops *ops; struct v4l2_subdev sd; @@ -124,7 +124,7 @@ static inline struct v4l2_flash *v4l2_ctrl_to_v4l2_flash(struct v4l2_ctrl *c) struct v4l2_flash *v4l2_flash_init( struct device *dev, struct fwnode_handle *fwn, struct led_classdev_flash *fled_cdev, - struct led_classdev_flash *iled_cdev, + struct led_classdev *iled_cdev, const struct v4l2_flash_ops *ops, struct v4l2_flash_config *config); @@ -140,7 +140,7 @@ void v4l2_flash_release(struct v4l2_flash *v4l2_flash); static inline struct v4l2_flash *v4l2_flash_init( struct device *dev, struct fwnode_handle *fwn, struct led_classdev_flash *fled_cdev, - struct led_classdev_flash *iled_cdev, + struct led_classdev *iled_cdev, const struct v4l2_flash_ops *ops, struct v4l2_flash_config *config) { -- cgit From d2f3c3849461baefdbb39123abde1054d46bf22e Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Mon, 24 Jul 2017 19:02:09 -0400 Subject: percpu: increase minimum percpu allocation size and align first regions This patch increases the minimum allocation size of percpu memory to 4-bytes. This change will help minimize the metadata overhead associated with the bitmap allocator. The assumption is that most allocations will be of objects or structs greater than 2 bytes with integers or longs being used rather than shorts. The first chunk regions are now aligned with the minimum allocation size. The reserved region is expected to be set as a multiple of the minimum allocation size. The static region is aligned up and the delta is removed from the dynamic size. This works because the dynamic size is increased to be page aligned. If the static size is not minimum allocation size aligned, then there must be a gap that is added to the dynamic size. The dynamic size will never be smaller than the set value. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Signed-off-by: Tejun Heo --- include/linux/percpu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 491b3f5a5f8a..90e0cb0f7802 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -21,6 +21,10 @@ /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) +/* minimum allocation size and shift in bytes */ +#define PCPU_MIN_ALLOC_SHIFT 2 +#define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT) + /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. -- cgit From 3acdfd280fe7d807237f2cb7a09d6f8f7f1b484f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Jul 2017 06:22:15 -0400 Subject: errseq: rename __errseq_set to errseq_set Nothing calls this wrapper anymore, so just remove it and rename the old function to get rid of the double underscore prefix. Signed-off-by: Jeff Layton --- include/linux/errseq.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/errseq.h b/include/linux/errseq.h index 9e0d444ac88d..784f0860527b 100644 --- a/include/linux/errseq.h +++ b/include/linux/errseq.h @@ -5,14 +5,7 @@ typedef u32 errseq_t; -errseq_t __errseq_set(errseq_t *eseq, int err); -static inline void errseq_set(errseq_t *eseq, int err) -{ - /* Optimize for the common case of no error */ - if (unlikely(err)) - __errseq_set(eseq, err); -} - +errseq_t errseq_set(errseq_t *eseq, int err); errseq_t errseq_sample(errseq_t *eseq); int errseq_check(errseq_t *eseq, errseq_t since); int errseq_check_and_advance(errseq_t *eseq, errseq_t *since); -- cgit From da48c948c263c9d87dfc64566b3373a858cc8aa2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jul 2017 15:23:27 -0400 Subject: media: fix warning on v4l2_subdev_call() result interpreted as bool v4l2_subdev_call is a macro returning whatever the callback return type is, usually 'int'. With gcc-7 and ccache, this can lead to many wanings like: media/platform/pxa_camera.c: In function 'pxa_mbus_build_fmts_xlate': media/platform/pxa_camera.c:766:27: error: ?: using integer constants in boolean context [-Werror=int-in-bool-context] while (!v4l2_subdev_call(subdev, pad, enum_mbus_code, NULL, &code)) { media/atomisp/pci/atomisp2/atomisp_cmd.c: In function 'atomisp_s_ae_window': media/atomisp/pci/atomisp2/atomisp_cmd.c:6414:52: error: ?: using integer constants in boolean context [-Werror=int-in-bool-context] if (v4l2_subdev_call(isp->inputs[asd->input_curr].camera, The problem here is that after preprocessing, we the compiler sees a variation of if (a ? 0 : 2) that it thinks is suspicious. This replaces the ?: operator with an different expression that does the same thing in a more easily readable way that cannot tigger the warning Link: https://lkml.org/lkml/2017/7/14/156 Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 0f92ebd2d710..e83872078376 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -982,8 +982,16 @@ void v4l2_subdev_init(struct v4l2_subdev *sd, * Example: err = v4l2_subdev_call(sd, video, s_std, norm); */ #define v4l2_subdev_call(sd, o, f, args...) \ - (!(sd) ? -ENODEV : (((sd)->ops->o && (sd)->ops->o->f) ? \ - (sd)->ops->o->f((sd), ##args) : -ENOIOCTLCMD)) + ({ \ + int __result; \ + if (!(sd)) \ + __result = -ENODEV; \ + else if (!((sd)->ops->o && (sd)->ops->o->f)) \ + __result = -ENOIOCTLCMD; \ + else \ + __result = (sd)->ops->o->f((sd), ##args); \ + __result; \ + }) #define v4l2_subdev_has_op(sd, o, f) \ ((sd)->ops->o && (sd)->ops->o->f) -- cgit From 40064aeca35c5c14789e2adcf3a1d7e5d4bd65f2 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Wed, 12 Jul 2017 11:27:32 -0700 Subject: percpu: replace area map allocator with bitmap The percpu memory allocator is experiencing scalability issues when allocating and freeing large numbers of counters as in BPF. Additionally, there is a corner case where iteration is triggered over all chunks if the contig_hint is the right size, but wrong alignment. This patch replaces the area map allocator with a basic bitmap allocator implementation. Each subsequent patch will introduce new features and replace full scanning functions with faster non-scanning options when possible. Implementation: This patchset removes the area map allocator in favor of a bitmap allocator backed by metadata blocks. The primary goal is to provide consistency in performance and memory footprint with a focus on small allocations (< 64 bytes). The bitmap removes the heavy memmove from the freeing critical path and provides a consistent memory footprint. The metadata blocks provide a bound on the amount of scanning required by maintaining a set of hints. In an effort to make freeing fast, the metadata is updated on the free path if the new free area makes a page free, a block free, or spans across blocks. This causes the chunk's contig hint to potentially be smaller than what it could allocate by up to the smaller of a page or a block. If the chunk's contig hint is contained within a block, a check occurs and the hint is kept accurate. Metadata is always kept accurate on allocation, so there will not be a situation where a chunk has a later contig hint than available. Evaluation: I have primarily done testing against a simple workload of allocation of 1 million objects (2^20) of varying size. Deallocation was done by in order, alternating, and in reverse. These numbers were collected after rebasing ontop of a80099a152. I present the worst-case numbers here: Area Map Allocator: Object Size | Alloc Time (ms) | Free Time (ms) ---------------------------------------------- 4B | 310 | 4770 16B | 557 | 1325 64B | 436 | 273 256B | 776 | 131 1024B | 3280 | 122 Bitmap Allocator: Object Size | Alloc Time (ms) | Free Time (ms) ---------------------------------------------- 4B | 490 | 70 16B | 515 | 75 64B | 610 | 80 256B | 950 | 100 1024B | 3520 | 200 This data demonstrates the inability for the area map allocator to handle less than ideal situations. In the best case of reverse deallocation, the area map allocator was able to perform within range of the bitmap allocator. In the worst case situation, freeing took nearly 5 seconds for 1 million 4-byte objects. The bitmap allocator dramatically improves the consistency of the free path. The small allocations performed nearly identical regardless of the freeing pattern. While it does add to the allocation latency, the allocation scenario here is optimal for the area map allocator. The area map allocator runs into trouble when it is allocating in chunks where the latter half is full. It is difficult to replicate this, so I present a variant where the pages are second half filled. Freeing was done sequentially. Below are the numbers for this scenario: Area Map Allocator: Object Size | Alloc Time (ms) | Free Time (ms) ---------------------------------------------- 4B | 4118 | 4892 16B | 1651 | 1163 64B | 598 | 285 256B | 771 | 158 1024B | 3034 | 160 Bitmap Allocator: Object Size | Alloc Time (ms) | Free Time (ms) ---------------------------------------------- 4B | 481 | 67 16B | 506 | 69 64B | 636 | 75 256B | 892 | 90 1024B | 3262 | 147 The data shows a parabolic curve of performance for the area map allocator. This is due to the memmove operation being the dominant cost with the lower object sizes as more objects are packed in a chunk and at higher object sizes, the traversal of the chunk slots is the dominating cost. The bitmap allocator suffers this problem as well. The above data shows the inability to scale for the allocation path with the area map allocator and that the bitmap allocator demonstrates consistent performance in general. The second problem of additional scanning can result in the area map allocator completing in 52 minutes when trying to allocate 1 million 4-byte objects with 8-byte alignment. The same workload takes approximately 16 seconds to complete for the bitmap allocator. V2: Fixed a bug in pcpu_alloc_first_chunk end_offset was setting the bitmap using bytes instead of bits. Added a comment to pcpu_cnt_pop_pages to explain bitmap_weight. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Signed-off-by: Tejun Heo --- include/linux/percpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 90e0cb0f7802..b7e6c98722d1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -120,7 +120,6 @@ extern bool is_kernel_percpu_address(unsigned long addr); #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) extern void __init setup_per_cpu_areas(void); #endif -extern void __init percpu_init_late(void); extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); extern void __percpu *__alloc_percpu(size_t size, size_t align); -- cgit From ca460b3c96274d79f84b31a3fea23a6eed479917 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Mon, 24 Jul 2017 19:02:12 -0400 Subject: percpu: introduce bitmap metadata blocks This patch introduces the bitmap metadata blocks and adds the skeleton of the code that will be used to maintain these blocks. Each chunk's bitmap is made up of full metadata blocks. These blocks maintain basic metadata to help prevent scanning unnecssarily to update hints. Full scanning methods are used for the skeleton and will be replaced in the coming patches. A number of helper functions are added as well to do conversion of pages to blocks and manage offsets. Comments will be updated as the final version of each function is added. There exists a relationship between PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE, the region size, and unit_size. Every chunk's region (including offsets) is page aligned at the beginning to preserve alignment. The end is aligned to LCM(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE) to ensure that the end can fit with the populated page map which is by page and every metadata block is fully accounted for. The unit_size is already page aligned, but must also be aligned with PCPU_BITMAP_BLOCK_SIZE to ensure full metadata blocks. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Signed-off-by: Tejun Heo --- include/linux/percpu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b7e6c98722d1..31795e619273 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -25,6 +25,18 @@ #define PCPU_MIN_ALLOC_SHIFT 2 #define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT) +/* + * This determines the size of each metadata block. There are several subtle + * constraints around this constant. The reserved region must be a multiple of + * PCPU_BITMAP_BLOCK_SIZE. Additionally, PCPU_BITMAP_BLOCK_SIZE must be a + * multiple of PAGE_SIZE or PAGE_SIZE must be a multiple of + * PCPU_BITMAP_BLOCK_SIZE to align with the populated page map. The unit_size + * also has to be a multiple of PCPU_BITMAP_BLOCK_SIZE to ensure full blocks. + */ +#define PCPU_BITMAP_BLOCK_SIZE PAGE_SIZE +#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \ + PCPU_MIN_ALLOC_SHIFT) + /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. -- cgit From b185cd0dc61c14875155e7bcc3f2c139b6feefd2 Mon Sep 17 00:00:00 2001 From: "Dennis Zhou (Facebook)" Date: Mon, 24 Jul 2017 19:02:17 -0400 Subject: percpu: update free path to take advantage of contig hints The bitmap allocator must keep metadata consistent. The easiest way is to scan after every allocation for each affected block and the entire chunk. This is rather expensive. The free path can take advantage of current contig hints to prevent scanning within the start and end block. If a scan is needed, it can be done by scanning backwards from the start and forwards from the end to identify the entire free area this can be combined with. The blocks can then be updated by some basic checks rather than complete block scans. A chunk scan happens when the freed area makes a page free, a block free, or spans across blocks. This is necessary as the contig hint at this point could span across blocks. The check uses the minimum of page size and the block size to allow for variable sized blocks. There is a tradeoff here with not updating after every free. It is possible a contig hint in one block can be merged with the contig hint in the next block. This means the contig hint can be off by up to a page. However, if the chunk's contig hint is contained in one block, the contig hint will be accurate. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Signed-off-by: Tejun Heo --- include/linux/percpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 31795e619273..6a5fb939d3e5 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -25,6 +25,9 @@ #define PCPU_MIN_ALLOC_SHIFT 2 #define PCPU_MIN_ALLOC_SIZE (1 << PCPU_MIN_ALLOC_SHIFT) +/* number of bits per page, used to trigger a scan if blocks are > PAGE_SIZE */ +#define PCPU_BITS_PER_PAGE (PAGE_SIZE >> PCPU_MIN_ALLOC_SHIFT) + /* * This determines the size of each metadata block. There are several subtle * constraints around this constant. The reserved region must be a multiple of -- cgit From 645874e5807ae5ffa09fba2ccd23f01e4eb16d58 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 26 Jul 2017 06:07:11 -0700 Subject: qed: Add support for Energy efficient ethernet. The patch adds required driver support for reading/configuring the Energy Efficient Ethernet (EEE) parameters. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ef39c7f40ae6..9f3276271b02 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -161,6 +161,18 @@ enum qed_nvm_images { QED_NVM_IMAGE_FCOE_CFG, }; +struct qed_link_eee_params { + u32 tx_lpi_timer; +#define QED_EEE_1G_ADV BIT(0) +#define QED_EEE_10G_ADV BIT(1) + + /* Capabilities are represented using QED_EEE_*_ADV values */ + u8 adv_caps; + u8 lp_adv_caps; + bool enable; + bool tx_lpi_enable; +}; + enum qed_led_mode { QED_LED_MODE_OFF, QED_LED_MODE_ON, @@ -408,6 +420,7 @@ struct qed_link_params { #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) #define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) #define QED_LINK_OVERRIDE_LOOPBACK_MODE BIT(4) +#define QED_LINK_OVERRIDE_EEE_CONFIG BIT(5) u32 override_flags; bool autoneg; u32 adv_speeds; @@ -422,6 +435,7 @@ struct qed_link_params { #define QED_LINK_LOOPBACK_EXT BIT(3) #define QED_LINK_LOOPBACK_MAC BIT(4) u32 loopback_mode; + struct qed_link_eee_params eee; }; struct qed_link_output { @@ -437,6 +451,12 @@ struct qed_link_output { u8 port; /* In PORT defs */ bool autoneg; u32 pause_config; + + /* EEE - capability & param */ + bool eee_supported; + bool eee_active; + u8 sup_caps; + struct qed_link_eee_params eee; }; struct qed_probe_params { -- cgit From 477f2d1460a636abd08f03eafabe0c51366fa5de Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 26 Jul 2017 06:07:13 -0700 Subject: qed: Add support for vf coalesce configuration. This patch add the ethtool support to set RX/Tx coalesce value to the VF associated Rx/Tx queues. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 9f3276271b02..4d59ca16134c 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -694,8 +694,8 @@ struct qed_common_ops { * * @return 0 on success, error otherwise. */ - int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u16 qid, u16 sb_id); + int (*set_coalesce)(struct qed_dev *cdev, + u16 rx_coal, u16 tx_coal, void *handle); /** * @brief set_led - Configure LED mode -- cgit From bf5a94bfe26a9fcd4af91ae6bccd4f3d600d2262 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 26 Jul 2017 06:07:14 -0700 Subject: qed: Read per queue coalesce from hardware Retrieve the actual coalesce value from hardware for every Rx/Tx queue, instead of Rx/Tx coalesce value cached during set coalesce. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 1 + include/linux/qed/qed_if.h | 11 +---------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 0eef0a2b1901..d60de4a39810 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -323,6 +323,7 @@ struct qed_eth_ops { int (*configure_arfs_searcher)(struct qed_dev *cdev, bool en_searcher); + int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 4d59ca16134c..2b4720bb8b40 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -186,6 +186,7 @@ enum qed_led_mode { #define QED_COALESCE_MAX 0xFF #define QED_DEFAULT_RX_USECS 12 +#define QED_DEFAULT_TX_USECS 48 /* forward */ struct qed_dev; @@ -673,16 +674,6 @@ struct qed_common_ops { int (*nvm_get_image)(struct qed_dev *cdev, enum qed_nvm_images type, u8 *buf, u16 len); -/** - * @brief get_coalesce - Get coalesce parameters in usec - * - * @param cdev - * @param rx_coal - Rx coalesce value in usec - * @param tx_coal - Tx coalesce value in usec - * - */ - void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal); - /** * @brief set_coalesce - Configure Rx coalesce value in usec * -- cgit From 41822878b2ff40a1f57be81f1a1f0f040847b912 Mon Sep 17 00:00:00 2001 From: Rahul Verma Date: Wed, 26 Jul 2017 06:07:15 -0700 Subject: qed: enhanced per queue max coalesce value. Maximum coalesce per Rx/Tx queue is extended from 255 to 511. Signed-off-by: Rahul Verma Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 2b4720bb8b40..cc646ca97974 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -184,7 +184,7 @@ enum qed_led_mode { #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) -#define QED_COALESCE_MAX 0xFF +#define QED_COALESCE_MAX 0x1FF #define QED_DEFAULT_RX_USECS 12 #define QED_DEFAULT_TX_USECS 48 -- cgit From ad84dad2160d5f36bb471b391462d651c887d693 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:05 +0300 Subject: RDMA/qedr: notify user application if DPM is supported Direct Packet Mode support may be disabled, e.g, due to limited resources. Notifying the user application prevents wasting cycles on attempting to send these kind of packets. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- include/uapi/rdma/qedr-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 75c270d839c8..2684004ec4fd 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -49,6 +49,7 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_recv_wr; __u32 sges_per_srq_wr; __u32 max_cqes; + __u8 dpm_enabled; }; struct qedr_alloc_pd_ureq { -- cgit From 67cbe3532c2cd84303a2073cedad6b8bcad13be3 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:06 +0300 Subject: RDMA/qedr: notify user application of supported WIDs The number of supported WIDs, if they are supported at all, can be limited due to resources. Notifying the user space application the number of available WIDs allows it to utilize them correctly. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- include/uapi/rdma/qedr-abi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 2684004ec4fd..54b64357ab24 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -50,6 +50,8 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_srq_wr; __u32 max_cqes; __u8 dpm_enabled; + __u8 wids_enabled; + __u16 wid_count; }; struct qedr_alloc_pd_ureq { -- cgit From fc2237a724a9e448599076d7d23497f51e2f7441 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 24 Jul 2017 09:46:18 -0700 Subject: HID: introduce hid_is_using_ll_driver Although HID itself is transport-agnostic, occasionally a driver may want to interact with the low-level transport that a device is connected through. To do this, we need to know what kind of bus is in use. The first guess may be to look at the 'bus' field of the 'struct hid_device', but this field may be emulated in some cases (e.g. uhid). More ideally, we can check which ll_driver a device is using. This function introduces a 'hid_is_using_ll_driver' function and makes the 'struct hid_ll_driver' of the four most common transports accessible through hid.h. Signed-off-by: Jason Gerecke Acked-By: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5006f9b5d837..3853408daf7f 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -777,6 +777,17 @@ struct hid_ll_driver { int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype); }; +extern struct hid_ll_driver i2c_hid_ll_driver; +extern struct hid_ll_driver hidp_hid_driver; +extern struct hid_ll_driver uhid_hid_driver; +extern struct hid_ll_driver usb_hid_driver; + +static inline bool hid_is_using_ll_driver(struct hid_device *hdev, + struct hid_ll_driver *driver) +{ + return hdev->ll_driver == driver; +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 -- cgit From ee011c5b4c43b215a985c7d4368f77a572c5c216 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Jul 2017 14:34:52 +0200 Subject: soc: renesas: Add r8a77995 SYSC PM Domain Binding Definitions Add power domain indices for R-Car D3. Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- include/dt-bindings/power/r8a77995-sysc.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/dt-bindings/power/r8a77995-sysc.h (limited to 'include') diff --git a/include/dt-bindings/power/r8a77995-sysc.h b/include/dt-bindings/power/r8a77995-sysc.h new file mode 100644 index 000000000000..09d0ed575b73 --- /dev/null +++ b/include/dt-bindings/power/r8a77995-sysc.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 Glider bvba + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ +#ifndef __DT_BINDINGS_POWER_R8A77995_SYSC_H__ +#define __DT_BINDINGS_POWER_R8A77995_SYSC_H__ + +/* + * These power domain indices match the numbers of the interrupt bits + * representing the power areas in the various Interrupt Registers + * (e.g. SYSCISR, Interrupt Status Register) + */ + +#define R8A77995_PD_CA53_CPU0 5 +#define R8A77995_PD_CA53_SCU 21 + +/* Always-on power area */ +#define R8A77995_PD_ALWAYS_ON 32 + +#endif /* __DT_BINDINGS_POWER_R8A77995_SYSC_H__ */ -- cgit From 649ea4d5a624f061a111b1f1cb0e47cfdc3ac21b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Jul 2017 15:56:53 -0500 Subject: objtool: Assume unannotated UD2 instructions are dead ends Arnd reported some false positive warnings with GCC 7: drivers/hid/wacom_wac.o: warning: objtool: wacom_bpt3_touch()+0x2a5: stack state mismatch: cfa1=7+8 cfa2=6+16 drivers/iio/adc/vf610_adc.o: warning: objtool: vf610_adc_calculate_rates() falls through to next function vf610_adc_sample_set() drivers/pwm/pwm-hibvt.o: warning: objtool: hibvt_pwm_get_state() falls through to next function hibvt_pwm_remove() drivers/pwm/pwm-mediatek.o: warning: objtool: mtk_pwm_config() falls through to next function mtk_pwm_enable() drivers/spi/spi-bcm2835.o: warning: objtool: .text: unexpected end of section drivers/spi/spi-bcm2835aux.o: warning: objtool: .text: unexpected end of section drivers/watchdog/digicolor_wdt.o: warning: objtool: dc_wdt_get_timeleft() falls through to next function dc_wdt_restart() When GCC 7 detects a potential divide-by-zero condition, it sometimes inserts a UD2 instruction for the case where the divisor is zero, instead of letting the hardware trap on the divide instruction. Objtool doesn't consider UD2 to be fatal unless it's annotated with unreachable(). So it considers the GCC-generated UD2 to be non-fatal, and it tries to follow the control flow past the UD2 and gets confused. Previously, objtool *did* assume UD2 was always a dead end. That changed with the following commit: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") The motivation behind that change was that Peter was planning on using UD2 for __WARN(), which is *not* a dead end. However, it turns out that some emulators rely on UD2 being fatal, so he ended up using 'ud0' instead: 9a93848fe787 ("x86/debug: Implement __WARN() using UD0") For GCC 4.5+, it should be safe to go back to the previous assumption that UD2 is fatal, even when it's not annotated with unreachable(). But for pre-4.5 versions of GCC, the unreachable() macro isn't supported, so such cases of UD2 need to be explicitly annotated as reachable. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/e57fa9dfede25f79487da8126ee9cdf7b856db65.1501188854.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 16 ---------------- include/linux/compiler.h | 25 ++++++++++++++++++++++++- 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 17cbe3cffc0e..dd4b4c59c2f0 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -128,22 +128,6 @@ #define __always_unused __attribute__((unused)) #define __mode(x) __attribute__((mode(x))) -#ifdef CONFIG_STACK_VALIDATION -#define annotate_unreachable() ({ \ - asm("%c0:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__LINE__)); \ -}) -#define ASM_UNREACHABLE \ - "999:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long 999b - .\n\t" \ - ".popsection\n\t" -#else -#define annotate_unreachable() -#endif - /* gcc version specific checks */ #if GCC_VERSION < 30200 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 641f5912d75f..70f070ddd7c7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -185,11 +185,34 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* Unreachable code */ +#ifdef CONFIG_STACK_VALIDATION +#define annotate_reachable() ({ \ + asm("%c0:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__LINE__)); \ +}) +#define annotate_unreachable() ({ \ + asm("%c0:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__LINE__)); \ +}) +#define ASM_UNREACHABLE \ + "999:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" +#else +#define annotate_reachable() +#define annotate_unreachable() +#endif + #ifndef ASM_UNREACHABLE # define ASM_UNREACHABLE #endif #ifndef unreachable -# define unreachable() do { } while (1) +# define unreachable() do { annotate_reachable(); do { } while (1); } while (0) #endif /* -- cgit From 333706b8ed81b64b6c4241e493791a81bc8e6d43 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Mon, 17 Jul 2017 15:16:21 -0500 Subject: crypto: Add akcipher_set_reqsize() function Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/crypto/internal/akcipher.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h index 479a0078f0f7..805686ba2be4 100644 --- a/include/crypto/internal/akcipher.h +++ b/include/crypto/internal/akcipher.h @@ -38,6 +38,12 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req) return req->__ctx; } +static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher, + unsigned int reqsize) +{ + crypto_akcipher_alg(akcipher)->reqsize = reqsize; +} + static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm) { return tfm->base.__crt_ctx; -- cgit From 80aafd50b6a4fa6b6bba4b451b553d5d221f59ff Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Jul 2017 06:22:16 -0400 Subject: Documentation: add some docs for errseq_t ...and fix up a few comments in the code. Signed-off-by: Jeff Layton --- include/linux/errseq.h | 5 +++-- include/linux/fs.h | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/errseq.h b/include/linux/errseq.h index 784f0860527b..f746bd8fe4d0 100644 --- a/include/linux/errseq.h +++ b/include/linux/errseq.h @@ -1,8 +1,9 @@ +/* + * See Documentation/errseq.rst and lib/errseq.c + */ #ifndef _LINUX_ERRSEQ_H #define _LINUX_ERRSEQ_H -/* See lib/errseq.c for more info */ - typedef u32 errseq_t; errseq_t errseq_set(errseq_t *eseq, int err); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7b5d6816542b..21e7df1ad613 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2571,8 +2571,6 @@ extern int __must_check file_write_and_wait_range(struct file *file, * When a writeback error occurs, most filesystems will want to call * filemap_set_wb_err to record the error in the mapping so that it will be * automatically reported whenever fsync is called on the file. - * - * FIXME: mention FS_* flag here? */ static inline void filemap_set_wb_err(struct address_space *mapping, int err) { -- cgit From 7d35079f8277b653d6a3075eea9edd4dbf7c2b29 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:46 -0700 Subject: kernfs: use idr instead of ida to manage inode number kernfs uses ida to manage inode number. The problem is we can't get kernfs_node from inode number with ida. Switching to use idr, next patch will add an API to get kernfs_node from inode number. Acked-by: Tejun Heo Acked-by: Greg Kroah-Hartman Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index a9b11b8d06f2..5f5d602eb433 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -163,7 +163,7 @@ struct kernfs_root { unsigned int flags; /* KERNFS_ROOT_* flags */ /* private fields, do not use outside kernfs proper */ - struct ida ino_ida; + struct idr ino_idr; struct kernfs_syscall_ops *syscall_ops; /* list of kernfs_super_info of this root, protected by kernfs_mutex */ -- cgit From 4a3ef68acacf31570066e69593de5cc49cc91638 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:47 -0700 Subject: kernfs: implement i_generation Set i_generation for kernfs inode. This is required to implement exportfs operations. The generation is 32-bit, so it's possible the generation wraps up and we find stale files. To reduce the posssibility, we don't reuse inode numer immediately. When the inode number allocation wraps, we increase generation number. In this way generation/inode number consist of a 64-bit number which is unlikely duplicated. This does make the idr tree more sparse and waste some memory. Since idr manages 32-bit keys, idr uses a 6-level radix tree, each level covers 6 bits of the key. In a 100k inode kernfs, the worst case will have around 300k radix tree node. Each node is 576bytes, so the tree will use about ~150M memory. Sounds not too bad, if this really is a problem, we should find better data structure. Acked-by: Tejun Heo Acked-by: Greg Kroah-Hartman Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/kernfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5f5d602eb433..8c00d28f468a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -135,6 +135,7 @@ struct kernfs_node { umode_t mode; unsigned int ino; struct kernfs_iattrs *iattr; + u32 generation; }; /* @@ -164,6 +165,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct idr ino_idr; + u32 next_generation; struct kernfs_syscall_ops *syscall_ops; /* list of kernfs_super_info of this root, protected by kernfs_mutex */ -- cgit From c53cd490b1a491ebf1d8e30da97e7231459a4208 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:50 -0700 Subject: kernfs: introduce kernfs_node_id inode number and generation can identify a kernfs node. We are going to export the identification by exportfs operations, so put ino and generation into a separate structure. It's convenient when later patches use the identification. Acked-by: Greg Kroah-Hartman Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/cgroup.h | 2 +- include/linux/kernfs.h | 12 ++++++++++-- include/trace/events/writeback.h | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 710a005c6b7a..30c68773fd1e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -543,7 +543,7 @@ static inline bool cgroup_is_populated(struct cgroup *cgrp) /* returns ino associated with a cgroup */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { - return cgrp->kn->ino; + return cgrp->kn->id.ino; } /* cft/css accessors for cftype->write() operation */ diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 8c00d28f468a..06a0c5913e1d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -95,6 +95,15 @@ struct kernfs_elem_attr { struct kernfs_node *notify_next; /* for kernfs_notify() */ }; +/* represent a kernfs node */ +union kernfs_node_id { + struct { + u32 ino; + u32 generation; + }; + u64 id; +}; + /* * kernfs_node - the building block of kernfs hierarchy. Each and every * kernfs node is represented by single kernfs_node. Most fields are @@ -131,11 +140,10 @@ struct kernfs_node { void *priv; + union kernfs_node_id id; unsigned short flags; umode_t mode; - unsigned int ino; struct kernfs_iattrs *iattr; - u32 generation; }; /* diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 7bd8783a590f..9b57f014d79d 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -136,7 +136,7 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb) { - return wb->memcg_css->cgroup->kn->ino; + return wb->memcg_css->cgroup->kn->id.ino; } static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc) -- cgit From aa8188253474b4053bc2900d9fcb545ce68bdf5c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:51 -0700 Subject: kernfs: add exportfs operations Now we have the facilities to implement exportfs operations. The idea is cgroup can export the fhandle info to userspace, then userspace uses fhandle to find the cgroup name. Another example is userspace can get fhandle for a cgroup and BPF uses the fhandle to filter info for the cgroup. Acked-by: Greg Kroah-Hartman Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/kernfs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 06a0c5913e1d..d149361e5875 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -69,6 +69,12 @@ enum kernfs_root_flag { * following flag enables that behavior. */ KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK = 0x0002, + + /* + * The filesystem supports exportfs operation, so userspace can use + * fhandle to access nodes of the fs. + */ + KERNFS_ROOT_SUPPORT_EXPORTOP = 0x0004, }; /* type-specific structures for kernfs_node union members */ @@ -98,6 +104,12 @@ struct kernfs_elem_attr { /* represent a kernfs node */ union kernfs_node_id { struct { + /* + * blktrace will export this struct as a simplified 'struct + * fid' (which is a big data struction), so userspace can use + * it to find kernfs node. The layout must match the first two + * fields of 'struct fid' exactly. + */ u32 ino; u32 generation; }; -- cgit From 121508df44d074245a72eda6b067478218480a40 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:52 -0700 Subject: cgroup: export fhandle info for a cgroup Add an API to export cgroup fhandle info. We don't export a full 'struct file_handle', there are unrequired info. Sepcifically, cgroup is always a directory, so we don't need a 'FILEID_INO32_GEN_PARENT' type fhandle, we only need export the inode number and generation number just like what generic_fh_to_dentry does. And we can avoid the overhead of getting an inode too, since kernfs_node_id (ino and generation) has all the info required. Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/cgroup.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 30c68773fd1e..52ef9a68ff14 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -609,6 +609,10 @@ static inline void cgroup_kthread_ready(void) current->no_cgroup_migration = 0; } +static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp) +{ + return &cgrp->kn->id; +} #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -631,6 +635,10 @@ static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_kthreadd(void) {} static inline void cgroup_kthread_ready(void) {} +static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp) +{ + return NULL; +} static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) -- cgit From ca1136c99b66b1566781ff12ecddc635d570f932 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:53 -0700 Subject: blktrace: export cgroup info in trace Currently blktrace isn't cgroup aware. blktrace prints out task name of current context, but the task of current context isn't always in the cgroup where the BIO comes from. We can't use task name to find out IO cgroup. For example, Writeback BIOs always comes from flusher thread but the BIOs are for different blk cgroups. Request could be requeued and dispatched from completely different tasks. MD/DM are another examples. This patch tries to fix the gap. We print out cgroup fhandle info in blktrace. Userspace can use open_by_handle_at() syscall to find the cgroup by fhandle. Or userspace can use name_to_handle_at() syscall to find fhandle for a cgroup and use a BPF program to filter out blktrace for a specific cgroup. We add a new 'blk_cgroup' trace option for blk tracer. It's default off. Application which doesn't know the new option isn't affected. When it's on, we output fhandle info right after blk_io_trace with an extra bit set in event action. So from application point of view, blktrace with the option will output new actions. I didn't change blk trace event yet, since I'm not sure if changing the trace event output is an ABI issue. If not, I'll do it later. Acked-by: Steven Rostedt (VMware) Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/uapi/linux/blktrace_api.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h index c590ca6bfbd9..9cdaedeadb84 100644 --- a/include/uapi/linux/blktrace_api.h +++ b/include/uapi/linux/blktrace_api.h @@ -52,6 +52,7 @@ enum blktrace_act { __BLK_TA_REMAP, /* bio was remapped */ __BLK_TA_ABORT, /* request aborted */ __BLK_TA_DRV_DATA, /* driver-specific binary data */ + __BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/ }; /* @@ -61,6 +62,7 @@ enum blktrace_notify { __BLK_TN_PROCESS = 0, /* establish pid/name mapping */ __BLK_TN_TIMESTAMP, /* include system clock */ __BLK_TN_MESSAGE, /* Character string message */ + __BLK_TN_CGROUP = __BLK_TA_CGROUP, /* from a cgroup */ }; @@ -107,6 +109,7 @@ struct blk_io_trace { __u32 cpu; /* on what cpu did it happen */ __u16 error; /* completion error */ __u16 pdu_len; /* length of data after this trace */ + /* cgroup id will be stored here if exists */ }; /* -- cgit From 007cc56b7eeca8848021bc43aca2b8607fbe5589 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:54 -0700 Subject: block: always attach cgroup info into bio blkcg_bio_issue_check() already gets blkcg for a BIO. bio_associate_blkcg() uses a percpu refcounter, so it's a very cheap operation. There is no point we don't attach the cgroup info into bio at blkcg_bio_issue_check. This also makes blktrace outputs correct cgroup info. Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 7104bea8dab1..9d92153dd856 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -691,6 +691,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, rcu_read_lock(); blkcg = bio_blkcg(bio); + /* associate blkcg if bio hasn't attached one */ + bio_associate_blkcg(bio, &blkcg->css); + blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(q->queue_lock); -- cgit From 69fd5c391763bd94a40dd152bc72a7f230137150 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:55 -0700 Subject: blktrace: add an option to allow displaying cgroup path By default we output cgroup id in blktrace. This adds an option to display cgroup path. Since get cgroup path is a relativly heavy operation, we don't enable it by default. with the option enabled, blktrace will output something like this: dd-1353 [007] d..2 293.015252: 8,0 /test/level D R 24 + 8 [dd] Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/cgroup.h | 6 ++++++ include/linux/kernfs.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 52ef9a68ff14..6144fe923b73 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -613,6 +613,9 @@ static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp) { return &cgrp->kn->id; } + +void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, + char *buf, size_t buflen); #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -645,6 +648,9 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, { return true; } + +static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, + char *buf, size_t buflen) {} #endif /* !CONFIG_CGROUPS */ /* diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index d149361e5875..ab25c8b6d9e3 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -358,6 +358,8 @@ struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns); void kernfs_init(void); +struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root, + const union kernfs_node_id *id); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) -- cgit From 35fe6d763229e8fc0eb5f9b93a401673cfcb5e1e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 12 Jul 2017 11:49:56 -0700 Subject: block: use standard blktrace API to output cgroup info for debug notes Currently cfq/bfq/blk-throttle output cgroup info in trace in their own way. Now we have standard blktrace API for this, so convert them to use it. Note, this changes the behavior a little bit. cgroup info isn't output by default, we only do this with 'blk_cgroup' option enabled. cgroup info isn't output as a string by default too, we only do this with 'blk_cgname' option enabled. Also cgroup info is output in different position of the note string. I think these behavior changes aren't a big issue (actually we make trace data shorter which is good), since the blktrace note is solely for debugging. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d2e908586e3d..67b4d4dfc19c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -28,10 +28,12 @@ struct blk_trace { atomic_t dropped; }; +struct blkcg; + extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern __printf(2, 3) -void __trace_note_message(struct blk_trace *, const char *fmt, ...); +extern __printf(3, 4) +void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *fmt, ...); /** * blk_add_trace_msg - Add a (simple) message to the blktrace stream @@ -46,12 +48,14 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...); * NOTE: Can not use 'static inline' due to presence of var args... * **/ -#define blk_add_trace_msg(q, fmt, ...) \ +#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ do { \ struct blk_trace *bt = (q)->blk_trace; \ if (unlikely(bt)) \ - __trace_note_message(bt, fmt, ##__VA_ARGS__); \ + __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ } while (0) +#define blk_add_trace_msg(q, fmt, ...) \ + blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) #define BLK_TN_MAX_MSG 128 static inline bool blk_trace_note_message_enabled(struct request_queue *q) @@ -82,6 +86,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) # define blk_trace_remove_sysfs(dev) do { } while (0) # define blk_trace_note_message_enabled(q) (false) static inline int blk_trace_init_sysfs(struct device *dev) -- cgit From 0bf8bf50eddc7511b52461bae798cbfaa0157a34 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 24 Jul 2017 18:27:25 -0700 Subject: module: Remove const attribute from alias for MODULE_DEVICE_TABLE MODULE_DEVICE_TABLE(type, name) creates an alias of type 'extern const typeof(name)'. If 'name' is already constant the 'const' attribute is specified twice, which is not allowed in C89 (see discussion at https://lkml.org/lkml/2017/5/23/1440). Since the kernel is built with -std=gnu89 clang generates warnings like this: drivers/thermal/x86_pkg_temp_thermal.c:509:1: warning: duplicate 'const' declaration specifier [-Wduplicate-decl-specifier] MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); ^ ./include/linux/module.h:212:8: note: expanded from macro 'MODULE_DEVICE_TABLE' extern const typeof(name) __mod_##type##__##name##_device_table Remove the const attribute from the alias to avoid the duplicate specifier. After all it is only an alias and the attribute shouldn't have any effect. Signed-off-by: Matthias Kaehlcke Signed-off-by: Jessica Yu --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index e7bdd549e527..fe5aa3736707 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -209,7 +209,7 @@ extern void cleanup_module(void); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ -extern const typeof(name) __mod_##type##__##name##_device_table \ +extern typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) -- cgit From 1a5f3da20bd966220931239fbd31e6ac6ff42251 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Ravipati Date: Thu, 27 Jul 2017 16:47:26 -0700 Subject: net: ethtool: add support for forward error correction modes Forward Error Correction (FEC) modes i.e Base-R and Reed-Solomon modes are introduced in 25G/40G/100G standards for providing good BER at high speeds. Various networking devices which support 25G/40G/100G provides ability to manage supported FEC modes and the lack of FEC encoding control and reporting today is a source for interoperability issues for many vendors. FEC capability as well as specific FEC mode i.e. Base-R or RS modes can be requested or advertised through bits D44:47 of base link codeword. This patch set intends to provide option under ethtool to manage and report FEC encoding settings for networking devices as per IEEE 802.3 bj, bm and by specs. set-fec/show-fec option(s) are designed to provide control and report the FEC encoding on the link. SET FEC option: root@tor: ethtool --set-fec swp1 encoding [off | RS | BaseR | auto] Encoding: Types of encoding Off : Turning off any encoding RS : enforcing RS-FEC encoding on supported speeds BaseR : enforcing Base R encoding on supported speeds Auto : IEEE defaults for the speed/medium combination Here are a few examples of what we would expect if encoding=auto: - if autoneg is on, we are expecting FEC to be negotiated as on or off as long as protocol supports it - if the hardware is capable of detecting the FEC encoding on it's receiver it will reconfigure its encoder to match - in absence of the above, the configuration would be set to IEEE defaults. >From our understanding , this is essentially what most hardware/driver combinations are doing today in the absence of a way for users to control the behavior. SHOW FEC option: root@tor: ethtool --show-fec swp1 FEC parameters for swp1: Active FEC encodings: RS Configured FEC encodings: RS | BaseR ETHTOOL DEVNAME output modification: ethtool devname output: root@tor:~# ethtool swp1 Settings for swp1: root@hpe-7712-03:~# ethtool swp18 Settings for swp18: Supported ports: [ FIBRE ] Supported link modes: 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 100000baseSR4/Full 100000baseCR4/Full 100000baseLR4_ER4/Full Supported pause frame use: No Supports auto-negotiation: Yes Supported FEC modes: [RS | BaseR | None | Not reported] Advertised link modes: Not reported Advertised pause frame use: No Advertised auto-negotiation: No Advertised FEC modes: [RS | BaseR | None | Not reported] <<<< One or more FEC modes Speed: 100000Mb/s Duplex: Full Port: FIBRE PHYAD: 106 Transceiver: internal Auto-negotiation: off Link detected: yes This patch includes following changes a) New ETHTOOL_SFECPARAM/SFECPARAM API, handled by the new get_fecparam/set_fecparam callbacks, provides support for configuration of forward error correction modes. b) Link mode bits for FEC modes i.e. None (No FEC mode), RS, BaseR/FC are defined so that users can configure these fec modes for supported and advertising fields as part of link autonegotiation. Signed-off-by: Vidya Sagar Ravipati Signed-off-by: Dustin Byford Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 4 ++++ include/uapi/linux/ethtool.h | 48 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 83cc9863444b..afdbb701fdb4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -374,5 +374,9 @@ struct ethtool_ops { struct ethtool_link_ksettings *); int (*set_link_ksettings)(struct net_device *, const struct ethtool_link_ksettings *); + int (*get_fecparam)(struct net_device *, + struct ethtool_fecparam *); + int (*set_fecparam)(struct net_device *, + struct ethtool_fecparam *); }; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7d4a594d5d58..9c041dae8e2c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1238,6 +1238,47 @@ struct ethtool_per_queue_op { char data[]; }; +/** + * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM + * @active_fec: FEC mode which is active on porte + * @fec: Bitmask of supported/configured FEC modes + * @rsvd: Reserved for future extensions. i.e FEC bypass feature. + * + * Drivers should reject a non-zero setting of @autoneg when + * autoneogotiation is disabled (or not supported) for the link. + * + */ +struct ethtool_fecparam { + __u32 cmd; + /* bitmask of FEC modes */ + __u32 active_fec; + __u32 fec; + __u32 reserved; +}; + +/** + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration + * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported + * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver + * @ETHTOOL_FEC_OFF: No FEC Mode + * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode + * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode + */ +enum ethtool_fec_config_bits { + ETHTOOL_FEC_NONE_BIT, + ETHTOOL_FEC_AUTO_BIT, + ETHTOOL_FEC_OFF_BIT, + ETHTOOL_FEC_RS_BIT, + ETHTOOL_FEC_BASER_BIT, +}; + +#define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) +#define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT) +#define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) +#define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) +#define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. * Please use ETHTOOL_GLINKSETTINGS @@ -1330,6 +1371,8 @@ struct ethtool_per_queue_op { #define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ #define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ #define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ +#define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */ +#define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET @@ -1387,6 +1430,9 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1395,7 +1441,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + = ETHTOOL_LINK_MODE_FEC_BASER_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ -- cgit From c7ac15ce89242e3c25fcc49fa4d00028285fab05 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 25 Jul 2017 20:39:58 +0300 Subject: serial: core: move UPF_NO_TXEN_TEST to quirks and rename First 16 bits in the flags field are user-visible except UPF_NO_TXEN_TEST. To keep it clean we introduce internal quirks and move UPF_NO_TXEN_TEST to them. Rename the constant to UPQ_NO_TXEN_TEST to distinguish with port flags. Users are converted accordingly. The quirks field might be extended later to hold the additional ones. Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 1775500294bb..8dc24c855a70 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -20,7 +20,7 @@ #ifndef LINUX_SERIAL_CORE_H #define LINUX_SERIAL_CORE_H - +#include #include #include #include @@ -144,7 +144,7 @@ struct uart_port { unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ - unsigned char unused1; + unsigned char quirks; /* internal quirks */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ @@ -155,6 +155,9 @@ struct uart_port { #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ + /* quirks must be updated while holding port mutex */ +#define UPQ_NO_TXEN_TEST BIT(0) + unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ @@ -175,7 +178,6 @@ struct uart_port { * [for bit definitions in the UPF_CHANGE_MASK] * * Bits [0..UPF_LAST_USER] are userspace defined/visible/changeable - * except bit 15 (UPF_NO_TXEN_TEST) which is masked off. * The remaining bits are serial-core specific and not modifiable by * userspace. */ @@ -192,7 +194,6 @@ struct uart_port { #define UPF_SPD_SHI ((__force upf_t) ASYNC_SPD_SHI /* 12 */ ) #define UPF_LOW_LATENCY ((__force upf_t) ASYNC_LOW_LATENCY /* 13 */ ) #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) -#define UPF_NO_TXEN_TEST ((__force upf_t) (1 << 15)) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) #define UPF_NO_THRE_TEST ((__force upf_t) (1 << 19)) -- cgit From 979990c6284814617d8f2179d197f72ff62b5d85 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Jun 2017 23:10:41 +0200 Subject: tty: improve tty_insert_flip_char() fast path kernelci.org reports a crazy stack usage for the VT code when CONFIG_KASAN is enabled: drivers/tty/vt/keyboard.c: In function 'kbd_keycode': drivers/tty/vt/keyboard.c:1452:1: error: the frame size of 2240 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] The problem is that tty_insert_flip_char() gets inlined many times into kbd_keycode(), and also into other functions, and each copy requires 128 bytes for stack redzone to check for a possible out-of-bounds access on the 'ch' and 'flags' arguments that are passed into tty_insert_flip_string_flags as a variable-length string. This introduces a new __tty_insert_flip_char() function for the slow path, which receives the two arguments by value. This completely avoids the problem and the stack usage goes back down to around 100 bytes. Without KASAN, this is also slightly better, as we don't have to spill the arguments to the stack but can simply pass 'ch' and 'flag' in registers, saving a few bytes in .text for each call site. This should be backported to linux-4.0 or later, which first introduced the stack sanitizer in the kernel. Cc: stable@vger.kernel.org Fixes: c420f167db8c ("kasan: enable stack instrumentation") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_flip.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index c28dd523f96e..d43837f2ce3a 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -12,6 +12,7 @@ extern int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars, size_t size); extern void tty_flip_buffer_push(struct tty_port *port); void tty_schedule_flip(struct tty_port *port); +int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag); static inline int tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag) @@ -26,7 +27,7 @@ static inline int tty_insert_flip_char(struct tty_port *port, *char_buf_ptr(tb, tb->used++) = ch; return 1; } - return tty_insert_flip_string_flags(port, &ch, &flag, 1); + return __tty_insert_flip_char(port, ch, flag); } static inline int tty_insert_flip_string(struct tty_port *port, -- cgit From 87840fb5a9dc934948ef7e067a993423a7a39df9 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Wed, 19 Jul 2017 14:42:25 +0300 Subject: staging: fsl-mc: add missing fsl_mc comment in struct msi_desc The mc-bus specific field, fsl_mc in struct msi_desc is missing its comment so add it. Signed-off-by: Laurentiu Tudor Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/msi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index df6d59201d31..80e3b562bef6 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -66,6 +66,7 @@ struct fsl_mc_msi_desc { * @mask_pos: [PCI MSI] Mask register position * @mask_base: [PCI MSI-X] Mask register base address * @platform: [platform] Platform device specific msi descriptor data + * @fsl_mc: [fsl-mc] FSL MC device specific msi descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ -- cgit From 64c83d837329531252a1a0f0dfdd4fd607e1d8e9 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:49 -0400 Subject: net netlink: Add new type NLA_BITFIELD32 Generic bitflags attribute content sent to the kernel by user. With this netlink attr type the user can either set or unset a flag in the kernel. The value is a bitmap that defines the bit values being set The selector is a bitmask that defines which value bit is to be considered. A check is made to ensure the rules that a kernel subsystem always conforms to bitflags the kernel already knows about. i.e if the user tries to set a bit flag that is not understood then the _it will be rejected_. In the most basic form, the user specifies the attribute policy as: [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, where myvalidflags is the bit mask of the flags the kernel understands. If the user _does not_ provide myvalidflags then the attribute will also be rejected. Examples: value = 0x0, and selector = 0x1 implies we are selecting bit 1 and we want to set its value to 0. value = 0x2, and selector = 0x2 implies we are selecting bit 2 and we want to set its value to 1. Suggested-by: Jiri Pirko Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/netlink.h | 16 ++++++++++++++++ include/uapi/linux/netlink.h | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..82dd298b40c7 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -178,6 +178,7 @@ enum { NLA_S16, NLA_S32, NLA_S64, + NLA_BITFIELD32, __NLA_TYPE_MAX, }; @@ -206,6 +207,7 @@ enum { * NLA_MSECS Leaving the length field zero will verify the * given type fits, using it verifies minimum length * just like "All other" + * NLA_BITFIELD32 A 32-bit bitmap/bitselector attribute * All other Minimum length of attribute payload * * Example: @@ -213,11 +215,13 @@ enum { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags }, * }; */ struct nla_policy { u16 type; u16 len; + void *validation_data; }; /** @@ -1202,6 +1206,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) return tmp; } +/** + * nla_get_bitfield32 - return payload of 32 bitfield attribute + * @nla: nla_bitfield32 attribute + */ +static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla) +{ + struct nla_bitfield32 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + return tmp; +} + /** * nla_memdup - duplicate attribute memory (kmemdup) * @src: netlink attribute to duplicate from diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f86127a46cfc..f4fc9c9e123d 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -226,5 +226,22 @@ struct nlattr { #define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) #define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) +/* Generic 32 bitflags attribute content sent to the kernel. + * + * The value is a bitmap that defines the values being set + * The selector is a bitmask that defines which value is legit + * + * Examples: + * value = 0x0, and selector = 0x1 + * implies we are selecting bit 1 and we want to set its value to 0. + * + * value = 0x2, and selector = 0x2 + * implies we are selecting bit 2 and we want to set its value to 1. + * + */ +struct nla_bitfield32 { + __u32 value; + __u32 selector; +}; #endif /* _UAPI__LINUX_NETLINK_H */ -- cgit From 90825b23a887f06f6c05bdde77b200c5fe9b6217 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:51 -0400 Subject: net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch When you dump hundreds of thousands of actions, getting only 32 per dump batch even when the socket buffer and memory allocations allow is inefficient. With this change, the user will get as many as possibly fitting within the given constraints available to the kernel. The top level action TLV space is extended. An attribute TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON is set by the user indicating the user is capable of processing these large dumps. Older user space which doesnt set this flag doesnt get the large (than 32) batches. The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many actions are put in a single batch. As such user space app knows how long to iterate (independent of the type of action being dumped) instead of hardcoded maximum of 32 thus maintaining backward compat. Some results dumping 1.5M actions below: first an unpatched tc which doesnt understand these features... prompt$ time -p tc actions ls action gact | grep index | wc -l 1500000 real 1388.43 user 2.07 sys 1386.79 Now lets see a patched tc which sets the correct flags when requesting a dump: prompt$ time -p updatedtc actions ls action gact | grep index | wc -l 1500000 real 178.13 user 2.02 sys 176.96 That is about 8x performance improvement for tc app which sets its receive buffer to about 32K. Signed-off-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d148505010a7..bfa80a6164d9 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -683,10 +683,28 @@ struct tcamsg { unsigned char tca__pad1; unsigned short tca__pad2; }; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + #define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) -#define TCA_ACT_TAB 1 /* attr type must be >=1 */ -#define TCAA_MAX 1 +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO + * actions in a dump. All dump responses will contain the number of actions + * being dumped stored in for user app's consumption in TCA_ROOT_COUNT + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) -- cgit From e62e484df04964ac947c679ef4f00c54ae5395aa Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 30 Jul 2017 13:24:52 -0400 Subject: net sched actions: add time filter for action dumping This patch adds support for filtering based on time since last used. When we are dumping a large number of actions it is useful to have the option of filtering based on when the action was last used to reduce the amount of data crossing to user space. With this patch the user space app sets the TCA_ROOT_TIME_DELTA attribute with the value in milliseconds with "time of interest since now". The kernel converts this to jiffies and does the filtering comparison matching entries that have seen activity since then and returns them to user space. Old kernels and old tc continue to work in legacy mode since they dont specify this attribute. Some example (we have 400 actions bound to 400 filters); at installation time. Using updated when tc setting the time of interest to 120 seconds earlier (we see 400 actions): prompt$ hackedtc actions ls action gact since 120000| grep index | wc -l 400 go get some coffee and wait for > 120 seconds and try again: prompt$ hackedtc actions ls action gact since 120000 | grep index | wc -l 0 Lets see a filter bound to one of these actions: .... filter pref 10 u32 filter pref 10 u32 fh 800: ht divisor 1 filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10 (rule hit 2 success 1) match 7f000002/ffffffff at 12 (success 1 ) action order 1: gact action pass random type none pass val 0 index 23 ref 2 bind 1 installed 1145 sec used 802 sec Action statistics: Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 .... that coffee took long, no? It was good. Now lets ping -c 1 127.0.0.2, then run the actions again: prompt$ hackedtc actions ls action gact since 120 | grep index | wc -l 1 More details please: prompt$ hackedtc -s actions ls action gact since 120000 action order 0: gact action pass random type none pass val 0 index 23 ref 2 bind 1 installed 1270 sec used 30 sec Action statistics: Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 And the filter? filter pref 10 u32 filter pref 10 u32 fh 800: ht divisor 1 filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10 (rule hit 4 success 2) match 7f000002/ffffffff at 12 (success 2 ) action order 1: gact action pass random type none pass val 0 index 23 ref 2 bind 1 installed 1324 sec used 84 sec Action statistics: Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Signed-off-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index bfa80a6164d9..dab7dad9e01a 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -691,6 +691,7 @@ enum { #define TCAA_MAX TCA_ROOT_TAB TCA_ROOT_FLAGS, TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ __TCA_ROOT_MAX, #define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) }; -- cgit From 0f9b973b65910eeb0f73bb1a2178bc0826a1b791 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 28 Jul 2017 23:13:11 +0200 Subject: dt-bindings: clock: meson8b: describe the embedded reset controller The Amlogic Meson8/Meson8b/Meson8m2 clock controller provides some reset lines. These are used for example to boot the secondary CPU cores. This patch describes the reset controller which is embedded into the clock controller on these SoCs. A header file is provided which provides preprocessor macros for each reset line (to make the .dts files easier to read). Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong --- .../dt-bindings/reset/amlogic,meson8b-clkc-reset.h | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/dt-bindings/reset/amlogic,meson8b-clkc-reset.h (limited to 'include') diff --git a/include/dt-bindings/reset/amlogic,meson8b-clkc-reset.h b/include/dt-bindings/reset/amlogic,meson8b-clkc-reset.h new file mode 100644 index 000000000000..1f1b56e57346 --- /dev/null +++ b/include/dt-bindings/reset/amlogic,meson8b-clkc-reset.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2017 Martin Blumenstingl . + * + * SPDX-License-Identifier: (GPL-2.0+ OR MIT) + */ + +#ifndef _DT_BINDINGS_AMLOGIC_MESON8B_CLKC_RESET_H +#define _DT_BINDINGS_AMLOGIC_MESON8B_CLKC_RESET_H + +#define CLKC_RESET_L2_CACHE_SOFT_RESET 0 +#define CLKC_RESET_AXI_64_TO_128_BRIDGE_A5_SOFT_RESET 1 +#define CLKC_RESET_SCU_SOFT_RESET 2 +#define CLKC_RESET_CPU0_SOFT_RESET 3 +#define CLKC_RESET_CPU1_SOFT_RESET 4 +#define CLKC_RESET_CPU2_SOFT_RESET 5 +#define CLKC_RESET_CPU3_SOFT_RESET 6 +#define CLKC_RESET_A5_GLOBAL_RESET 7 +#define CLKC_RESET_A5_AXI_SOFT_RESET 8 +#define CLKC_RESET_A5_ABP_SOFT_RESET 9 +#define CLKC_RESET_AXI_64_TO_128_BRIDGE_MMC_SOFT_RESET 10 +#define CLKC_RESET_VID_CLK_CNTL_SOFT_RESET 11 +#define CLKC_RESET_VID_DIVIDER_CNTL_SOFT_RESET_POST 12 +#define CLKC_RESET_VID_DIVIDER_CNTL_SOFT_RESET_PRE 13 +#define CLKC_RESET_VID_DIVIDER_CNTL_RESET_N_POST 14 +#define CLKC_RESET_VID_DIVIDER_CNTL_RESET_N_PRE 15 + +#endif /* _DT_BINDINGS_AMLOGIC_MESON8B_CLKC_RESET_H */ -- cgit From f1e3f409d61372b742697d0b5e5cd17d09de37db Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 29 Jul 2017 11:40:55 -0300 Subject: ASoC: soc-pcm: Remove unused 'debugfs_dpcm_state' entry 'debugfs_dpcm_state' member from structure snd_soc_pcm_runtime is never used at all, so it is safe to remove it. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- include/sound/soc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 9c94b97c17f8..230b8eedd9f2 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1240,7 +1240,6 @@ struct snd_soc_pcm_runtime { struct delayed_work delayed_work; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_dpcm_root; - struct dentry *debugfs_dpcm_state; #endif unsigned int num; /* 0-based and monotonic increasing */ -- cgit From ac7b848390036dadd4351899d2a23748075916bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:31 +0200 Subject: netfilter: expect: add and use nf_ct_expect_iterate helpers We have several spots that open-code a expect walk, add a helper that is similar to nf_ct_iterate_destroy/nf_ct_iterate_cleanup. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 2ba54feaccd8..818def011110 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data); +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, u32 portid, int report); + /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); -- cgit From 84657984c26fd0b64743a397f3a1a587fa4b575a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:32 +0200 Subject: netfilter: add and use nf_ct_unconfirmed_destroy This also removes __nf_ct_unconfirmed_destroy() call from nf_ct_iterate_cleanup_net, so that function can be used only when missing conntracks from unconfirmed list isn't a problem. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 48407569585d..6e6f678aaac7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); +/* Set all unconfirmed conntrack as dying */ +void nf_ct_unconfirmed_destroy(struct net *); + /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), -- cgit From 2cf0c8b3e6942ecafe6ebb1a6d0328a81641bf39 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:40 +0200 Subject: netlink: Introduce nla_strdup() This is similar to strdup() for netlink string attributes. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a6..c8c2eb5ae55e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -247,6 +247,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); +char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); -- cgit From e46abbcc05aa8a16b0e7f5c94e86d11af9aa2770 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:41 +0200 Subject: netfilter: nf_tables: Allow table names of up to 255 chars Allocate all table names dynamically to allow for arbitrary lengths but introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was chosen to allow using a domain name as per RFC 1035. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bd5be0d691d5..05ecf78ec078 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -957,7 +957,7 @@ struct nft_table { u32 use; u16 flags:14, genmask:2; - char name[NFT_TABLE_MAXNAMELEN]; + char *name; }; enum nft_af_flags { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6f0a950e21c3..0b94e572ef16 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1,7 +1,8 @@ #ifndef _LINUX_NF_TABLES_H #define _LINUX_NF_TABLES_H -#define NFT_TABLE_MAXNAMELEN 32 +#define NFT_NAME_MAXLEN 256 +#define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 #define NFT_OBJ_MAXNAMELEN 32 -- cgit From b7263e071aba736cea9e71cdf2e76dfa7aebd039 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:42 +0200 Subject: netfilter: nf_tables: Allow chain name of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 05ecf78ec078..be1610162ee0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -859,7 +859,7 @@ struct nft_chain { u16 level; u8 flags:6, genmask:2; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; }; enum nft_chain_type { @@ -1272,7 +1272,7 @@ struct nft_trans_set { struct nft_trans_chain { bool update; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; struct nft_stats __percpu *stats; u8 policy; }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 0b94e572ef16..d9c03a8608ee 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -3,7 +3,7 @@ #define NFT_NAME_MAXLEN 256 #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_CHAIN_MAXNAMELEN 32 +#define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_SET_MAXNAMELEN 32 #define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 -- cgit From 387454901bd62022ac1b04e15bd8d4fcc60bbed4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:43 +0200 Subject: netfilter: nf_tables: Allow set names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index be1610162ee0..66ba62fa7d90 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type); struct nft_set { struct list_head list; struct list_head bindings; - char name[NFT_SET_MAXNAMELEN]; + char *name; u32 ktype; u32 dtype; u32 objtype; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d9c03a8608ee..b5e73e80b7b6 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,7 +4,7 @@ #define NFT_NAME_MAXLEN 256 #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_SET_MAXNAMELEN 32 +#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 -- cgit From 615095752100748e221028fc96163c2b78185ae4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:44 +0200 Subject: netfilter: nf_tables: Allow object names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66ba62fa7d90..f9795fe394f3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, */ struct nft_object { struct list_head list; - char name[NFT_OBJ_MAXNAMELEN]; + char *name; struct nft_table *table; u32 genmask:2, use:30; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b5e73e80b7b6..be25cf69295b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -5,7 +5,7 @@ #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_OBJ_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_USERDATA_MAXLEN 256 /** -- cgit From 4d3a57f23dec59f0a2362e63540b2d01b37afe0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Jul 2017 11:22:04 +0200 Subject: netfilter: conntrack: do not enable connection tracking unless needed Discussion during NFWS 2017 in Faro has shown that the current conntrack behaviour is unreasonable. Even if conntrack module is loaded on behalf of a single net namespace, its turned on for all namespaces, which is expensive. Commit 481fa373476 ("netfilter: conntrack: add nf_conntrack_default_on sysctl") attempted to provide an alternative to the 'default on' behaviour by adding a sysctl to change it. However, as Eric points out, the sysctl only becomes available once the module is loaded, and then its too late. So we either have to move the sysctl to the core, or, alternatively, change conntrack to become active only once the rule set requires this. This does the latter, conntrack is only enabled when a rule needs it. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6d14b36e3a49..1b8de164d744 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -73,21 +73,6 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; -#ifdef CONFIG_SYSCTL -/* Protocol pernet registration. */ -int nf_ct_l3proto_pernet_register(struct net *net, - struct nf_conntrack_l3proto *proto); -#else -static inline int nf_ct_l3proto_pernet_register(struct net *n, - struct nf_conntrack_l3proto *p) -{ - return 0; -} -#endif - -void nf_ct_l3proto_pernet_unregister(struct net *net, - struct nf_conntrack_l3proto *proto); - /* Protocol global registration. */ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); -- cgit From bf90aadd630c2c9f7f965ba1e90d41b5b46db7c9 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:12 -0700 Subject: IB/hfi1: Send MAD traps until repressed A trap should be sent to the FM until the FM sends a repress message. This is in line with the IBTA 13.4.9. Add the ability to resend traps until a repress message is received. Reviewed-by: Dennis Dalessandro Reviewed-by: Michael N. Henry Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 22fb15ff5e8b..fdfac0fd2f82 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -57,11 +57,21 @@ #include #include #include +#include #include #include #define RVT_MAX_PKEY_VALUES 16 +#define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ +#define RVT_MAX_TRAP_LISTS ((IB_NOTICE_TYPE_INFO & 0x0F) + 1) +#define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ + +struct trap_list { + u32 list_len; + struct list_head list; +}; + struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -128,6 +138,13 @@ struct rvt_ibport { u16 *pkey_table; struct rvt_ah *sm_ah; + + /* + * Keep a list of traps that have not been repressed. They will be + * resent based on trap_timer. + */ + struct trap_list trap_lists[RVT_MAX_TRAP_LISTS]; + struct timer_list trap_timer; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ -- cgit From c5dc3c69f17a7e77359f10c342d1816390bc8846 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Jun 2017 20:04:58 +0200 Subject: PCI/portdrv: Move error handler methods to struct pcie_port_service_driver Move the error handler methods to struct pcie_port_service_driver and avoid the detour through the mostly unused pci_error_handlers structure. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas --- include/linux/pcieport_if.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index afcd130ab3a9..18edc651c070 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -50,8 +50,8 @@ struct pcie_port_service_driver { int (*suspend) (struct pcie_device *dev); int (*resume) (struct pcie_device *dev); - /* Service Error Recovery Handler */ - const struct pci_error_handlers *err_handler; + /* Device driver may resume normal operations */ + void (*error_resume)(struct pci_dev *dev); /* Link Reset Capability - AER service driver specific */ pci_ers_result_t (*reset_link) (struct pci_dev *dev); -- cgit From 62ce94a7a5a54aac80975f5e6731707225d4077e Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 12 Jul 2017 00:04:14 -0400 Subject: PCI: Mark Broadcom HT2100 Root Port Extended Tags as broken Per PCIe r3.1, sec 2.2.6.2 and 7.8.4, a Requester may not use 8-bit Tags unless its Extended Tag Field Enable is set, but all Receivers/Completers must handle 8-bit Tags correctly regardless of their Extended Tag Field Enable. Some devices do not handle 8-bit Tags as Completers, so add a quirk for them. If we find such a device, we disable Extended Tags for the entire hierarchy to make peer-to-peer DMA possible. The Broadcom HT2100 seems to have issues with handling 8-bit tags. Mark it as broken. The pci_walk_bus() in the quirk handles devices we've enumerated in the past, and pci_configure_device() handles devices we enumerate in the future. Fixes: 60db3a4d8cc9 ("PCI: Enable PCIe Extended Tags if supported") Link: https://bugzilla.redhat.com/show_bug.cgi?id=1467674 Reported-and-tested-by: Wim ten Have Signed-off-by: Sinan Kaya [bhelgaas: changelog, tweak messages, rename bit and quirk] Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..3b968d435895 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -451,6 +451,7 @@ struct pci_host_bridge { void *release_data; struct msi_controller *msi; unsigned int ignore_reset_delay:1; /* for entire hierarchy */ + unsigned int no_ext_tags:1; /* no Extended Tags */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, const struct resource *res, -- cgit From e7942d0633c47c791ece6afa038be9cf977226de Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:18 +0200 Subject: tcp: remove prequeue support prequeue is a tcp receive optimization that moves part of rx processing from bh to process context. This only works if the socket being processed belongs to a process that is blocked in recv on that socket. In practice, this doesn't happen anymore that often because nowadays servers tend to use an event driven (epoll) model. Even normal client applications (web browsers) commonly use many tcp connections in parallel. This has measureable impact only in netperf (which uses plain recv and thus allows prequeue use) from host to locally running vm (~4%), however, there were no changes when using netperf between two physical hosts with ixgbe interfaces. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 --------- include/net/tcp.h | 11 ----------- 2 files changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 542ca1ae02c4..32fb37cfb0d1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -192,15 +192,6 @@ struct tcp_sock { struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - /* Data for direct copy to user */ - struct { - struct sk_buff_head prequeue; - struct task_struct *task; - struct msghdr *msg; - int memory; - int len; - } ucopy; - u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ u32 max_window; /* Maximal window ever seen from peer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 12d68335acd4..93f115cfc8f8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1244,17 +1244,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) __tcp_checksum_complete(skb); } -/* Prequeue for VJ style copy to user, combined with checksumming. */ - -static inline void tcp_prequeue_init(struct tcp_sock *tp) -{ - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - tp->ucopy.memory = 0; - skb_queue_head_init(&tp->ucopy.prequeue); -} - -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); -- cgit From b6690b14386698ce2c19309abad3f17656bdfaea Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:20 +0200 Subject: tcp: remove low_latency sysctl Was only checked by the removed prequeue code. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 93f115cfc8f8..8507c81fb0e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -256,7 +256,6 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_frto; -extern int sysctl_tcp_low_latency; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -- cgit From 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:21 +0200 Subject: tcp: remove header prediction Like prequeue, I am not sure this is overly useful nowadays. If we receive a train of packets, GRO will aggregate them if the headers are the same (HP predates GRO by several years) so we don't get a per-packet benefit, only a per-aggregated-packet one. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ------ include/net/tcp.h | 23 ----------------------- 2 files changed, 29 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 32fb37cfb0d1..d7389ea36e10 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -147,12 +147,6 @@ struct tcp_sock { u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ -/* - * Header prediction flags - * 0x5?10 << 16 + snd_wnd in net byte order - */ - __be32 pred_flags; - /* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) diff --git a/include/net/tcp.h b/include/net/tcp.h index 8507c81fb0e9..8f11b82b5b5a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -631,29 +631,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) -{ - tp->pred_flags = htonl((tp->tcp_header_len << 26) | - ntohl(TCP_FLAG_ACK) | - snd_wnd); -} - -static inline void tcp_fast_path_on(struct tcp_sock *tp) -{ - __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); -} - -static inline void tcp_fast_path_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && - tp->rcv_wnd && - atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && - !tp->urg_data) - tcp_fast_path_on(tp); -} - /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { -- cgit From 573aeb0492be3d0e5be9796a0c91abde794c1e36 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:22 +0200 Subject: tcp: remove CA_ACK_SLOWPATH re-indent tcp_ack, and remove CA_ACK_SLOWPATH; it is always set now. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f11b82b5b5a..3ecb62811004 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -880,9 +880,8 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ - CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ - CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ + CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ + CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ }; /* -- cgit From 3282e65558b3651e230ee985c174c35cb2fedaf1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jul 2017 03:57:23 +0200 Subject: tcp: remove unused mib counters was used by tcp prequeue and header prediction. TCPFORWARDRETRANS use was removed in january. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index d85693295798..b3f346fb9fe3 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -184,14 +184,7 @@ enum LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */ LINUX_MIB_LISTENDROPS, /* ListenDrops */ - LINUX_MIB_TCPPREQUEUED, /* TCPPrequeued */ - LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, /* TCPDirectCopyFromBacklog */ - LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, /* TCPDirectCopyFromPrequeue */ - LINUX_MIB_TCPPREQUEUEDROPPED, /* TCPPrequeueDropped */ - LINUX_MIB_TCPHPHITS, /* TCPHPHits */ - LINUX_MIB_TCPHPHITSTOUSER, /* TCPHPHitsToUser */ LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */ - LINUX_MIB_TCPHPACKS, /* TCPHPAcks */ LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ @@ -208,14 +201,12 @@ enum LINUX_MIB_TCPSACKFAILURES, /* TCPSackFailures */ LINUX_MIB_TCPLOSSFAILURES, /* TCPLossFailures */ LINUX_MIB_TCPFASTRETRANS, /* TCPFastRetrans */ - LINUX_MIB_TCPFORWARDRETRANS, /* TCPForwardRetrans */ LINUX_MIB_TCPSLOWSTARTRETRANS, /* TCPSlowStartRetrans */ LINUX_MIB_TCPTIMEOUTS, /* TCPTimeouts */ LINUX_MIB_TCPLOSSPROBES, /* TCPLossProbes */ LINUX_MIB_TCPLOSSPROBERECOVERY, /* TCPLossProbeRecovery */ LINUX_MIB_TCPRENORECOVERYFAIL, /* TCPRenoRecoveryFail */ LINUX_MIB_TCPSACKRECOVERYFAIL, /* TCPSackRecoveryFail */ - LINUX_MIB_TCPSCHEDULERFAILED, /* TCPSchedulerFailed */ LINUX_MIB_TCPRCVCOLLAPSED, /* TCPRcvCollapsed */ LINUX_MIB_TCPDSACKOLDSENT, /* TCPDSACKOldSent */ LINUX_MIB_TCPDSACKOFOSENT, /* TCPDSACKOfoSent */ -- cgit From f248aff86d1fd6e60b656c2af278f1723c3b84c2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 31 Jul 2017 12:04:25 -0700 Subject: net: phy: mdio-bcm-unimac: Allow specifying platform data In preparation for having the bcmgenet driver migrate over the mdio-bcm-unimac driver, add a platform data structure which allows passing integrating specific details like bus name, wait function to complete MDIO operations and PHY mask. We also define what the platform device name contract is by defining UNIMAC_MDIO_DRV_NAME and moving it to the platform_data header. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/platform_data/mdio-bcm-unimac.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/platform_data/mdio-bcm-unimac.h (limited to 'include') diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h new file mode 100644 index 000000000000..8a5f9f0b2c52 --- /dev/null +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -0,0 +1,13 @@ +#ifndef __MDIO_BCM_UNIMAC_PDATA_H +#define __MDIO_BCM_UNIMAC_PDATA_H + +struct unimac_mdio_pdata { + u32 phy_mask; + int (*wait_func)(void *data); + void *wait_func_data; + const char *bus_name; +}; + +#define UNIMAC_MDIO_DRV_NAME "unimac-mdio" + +#endif /* __MDIO_BCM_UNIMAC_PDATA_H */ -- cgit From a823e4589e68996436d16db4ede9a43b646332f9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 28 Jul 2017 07:24:43 -0400 Subject: mm: add file_fdatawait_range and file_write_and_wait Necessary now for gfs2_fsync and sync_file_range, but there will eventually be other callers. Reviewed-by: Jan Kara Signed-off-by: Jeff Layton --- include/linux/fs.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 21e7df1ad613..af592ca3d509 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2544,6 +2544,8 @@ extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); +extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, + loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); @@ -2552,12 +2554,19 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping, extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); extern int filemap_check_errors(struct address_space *mapping); - extern void __filemap_set_wb_err(struct address_space *mapping, int err); + +extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, + loff_t lend); extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); +static inline int file_write_and_wait(struct file *file) +{ + return file_write_and_wait_range(file, 0, LLONG_MAX); +} + /** * filemap_set_wb_err - set a writeback error on an address_space * @mapping: mapping in which to set writeback error -- cgit From f247037120ecd3dcbbc196b51ded8b57edf4904f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 19 Jul 2017 00:19:05 +0800 Subject: f2fs: make max inline size changeable This patch tries to make below macros calculating max inline size, inline dentry field size considerring reserving size-changeable space: - MAX_INLINE_DATA - NR_INLINE_DENTRY - INLINE_DENTRY_BITMAP_SIZE - INLINE_RESERVED_SIZE Then, when inline_{data,dentry} options is enabled, it allows us to reserve inline space with different size flexibly for adding newly introduced inode attribute. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index b6feed6547ce..060b1c5b0836 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -206,9 +206,6 @@ struct f2fs_extent { #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ -#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ - F2FS_INLINE_XATTR_ADDRS - 1)) - struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ @@ -465,7 +462,7 @@ typedef __le32 f2fs_hash_t; #define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) /* - * space utilization of regular dentry and inline dentry + * space utilization of regular dentry and inline dentry (w/o extra reservation) * regular dentry inline dentry * bitmap 1 * 27 = 27 1 * 23 = 23 * reserved 1 * 3 = 3 1 * 7 = 7 @@ -501,24 +498,6 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; -/* for inline dir */ -#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \ - ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ - BITS_PER_BYTE + 1)) -#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \ - BITS_PER_BYTE - 1) / BITS_PER_BYTE) -#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \ - ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ - NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE)) - -/* inline directory entry structure */ -struct f2fs_inline_dentry { - __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE]; - __u8 reserved[INLINE_RESERVED_SIZE]; - struct f2fs_dir_entry dentry[NR_INLINE_DENTRY]; - __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN]; -} __packed; - /* file types used in inode_info->flags */ enum { F2FS_FT_UNKNOWN, -- cgit From 7a2af766af15887754f7f7a0869b4603b390876a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 19 Jul 2017 00:19:06 +0800 Subject: f2fs: enhance on-disk inode structure scalability This patch add new flag F2FS_EXTRA_ATTR storing in inode.i_inline to indicate that on-disk structure of current inode is extended. In order to extend, we changed the inode structure a bit: Original one: struct f2fs_inode { ... struct f2fs_extent i_ext; __le32 i_addr[DEF_ADDRS_PER_INODE]; __le32 i_nid[DEF_NIDS_PER_INODE]; } Extended one: struct f2fs_inode { ... struct f2fs_extent i_ext; union { struct { __le16 i_extra_isize; __le16 i_padding; __le32 i_extra_end[0]; }; __le32 i_addr[DEF_ADDRS_PER_INODE]; }; __le32 i_nid[DEF_NIDS_PER_INODE]; } Once F2FS_EXTRA_ATTR is set, we will steal four bytes in the head of i_addr field for storing i_extra_isize and i_padding. with i_extra_isize, we can calculate actual size of reserved space in i_addr, available attribute fields included in total extra attribute fields for current inode can be described as below: +--------------------+ | .i_mode | | ... | | .i_ext | +--------------------+ | .i_extra_isize |-----+ | .i_padding | | | .i_prjid | | | .i_atime_extra | | | .i_ctime_extra | | | .i_mtime_extra |<----+ | .i_inode_cs |<----- store blkaddr/inline from here | .i_xattr_cs | | ... | +--------------------+ | | | block address | | | +--------------------+ | .i_nid | +--------------------+ | node_footer | | (nid, ino, offset) | +--------------------+ Hence, with this patch, we would enhance scalability of f2fs inode for storing more newly added attribute. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 060b1c5b0836..0c79ddd40b70 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -186,6 +186,8 @@ struct f2fs_extent { #define F2FS_NAME_LEN 255 #define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ + get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ #define ADDRS_PER_INODE(inode) addrs_per_inode(inode) #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ @@ -205,6 +207,7 @@ struct f2fs_extent { #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ struct f2fs_inode { __le16 i_mode; /* file mode */ @@ -232,8 +235,14 @@ struct f2fs_inode { struct f2fs_extent i_ext; /* caching a largest extent */ - __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ - + union { + struct { + __le16 i_extra_isize; /* extra inode attribute size */ + __le16 i_padding; /* padding */ + __le32 i_extra_end[0]; /* for attribute size calculation */ + }; + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ + }; __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2), double_indirect(1) node id */ } __packed; -- cgit From 5c57132eaf5265937e46340bfbfb97ffb078c423 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Jul 2017 00:01:41 +0800 Subject: f2fs: support project quota This patch adds to support plain project quota. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 0c79ddd40b70..50f176683676 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -239,6 +239,7 @@ struct f2fs_inode { struct { __le16 i_extra_isize; /* extra inode attribute size */ __le16 i_padding; /* padding */ + __le32 i_projid; /* project id */ __le32 i_extra_end[0]; /* for attribute size calculation */ }; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ @@ -522,4 +523,6 @@ enum { #define S_SHIFT 12 +#define F2FS_DEF_PROJID 0 /* default project ID */ + #endif /* _LINUX_F2FS_FS_H */ -- cgit From bb7c19f96012720b895111300b9d9f3f858c3a69 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 28 Jul 2017 10:28:21 -0700 Subject: tcp: add related fields into SCM_TIMESTAMPING_OPT_STATS Add the following stats into SCM_TIMESTAMPING_OPT_STATS control msg: TCP_NLA_PACING_RATE TCP_NLA_DELIVERY_RATE TCP_NLA_SND_CWND TCP_NLA_REORDERING TCP_NLA_MIN_RTT TCP_NLA_RECUR_RETRANS TCP_NLA_DELIVERY_RATE_APP_LMT Signed-off-by: Wei Wang Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index a5507c977497..030e594bab45 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -231,6 +231,14 @@ enum { TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */ TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */ TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */ + TCP_NLA_PACING_RATE, /* Pacing rate in bytes per second */ + TCP_NLA_DELIVERY_RATE, /* Delivery rate in bytes per second */ + TCP_NLA_SND_CWND, /* Sending congestion window */ + TCP_NLA_REORDERING, /* Reordering metric */ + TCP_NLA_MIN_RTT, /* minimum RTT */ + TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */ + TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */ + }; /* for TCP_MD5SIG socket option */ -- cgit From 6f68f0ac72087c29a94d1736324902e3ec07a4ec Mon Sep 17 00:00:00 2001 From: Binoy Jayan Date: Wed, 14 Jun 2017 12:59:51 +0530 Subject: HID: Remove the semaphore driver_lock The semaphore 'driver_lock' is used as a simple mutex, and also unnecessary as suggested by Arnd. Hence removing it, as the concurrency between the probe and remove is already handled in the driver core. Suggested-by: Arnd Bergmann Signed-off-by: Binoy Jayan Acked-by: Benjamin Tissoires Reviewed-by: David Herrmann Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5006f9b5d837..142409fc1ff3 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -526,7 +526,6 @@ struct hid_device { /* device report descriptor */ struct hid_report_enum report_enum[HID_REPORT_TYPES]; struct work_struct led_work; /* delayed LED worker */ - struct semaphore driver_lock; /* protects the current driver, except during input */ struct semaphore driver_input_lock; /* protects the current driver */ struct device dev; /* device */ struct hid_driver *driver; @@ -551,7 +550,7 @@ struct hid_device { /* device report descriptor */ unsigned int status; /* see STAT flags above */ unsigned claimed; /* Claimed by hidinput, hiddev? */ unsigned quirks; /* Various quirks the device can pull on us */ - bool io_started; /* Protected by driver_lock. If IO has started */ + bool io_started; /* If IO has started */ struct list_head inputs; /* The list of inputs */ void *hiddev; /* The hiddev structure */ -- cgit From 99d8845e756cb91e2865f430401d084cd6a8ccc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 21 Jul 2017 14:40:49 +0200 Subject: ACPI / PM: Split acpi_device_wakeup() To prepare for a subsequent change and make the code somewhat easier to follow, do the following in the ACPI device wakeup handling code: * Replace wakeup.flags.enabled under struct acpi_device with wakeup.enable_count as that will be necessary going forward. For now, wakeup.enable_count is not allowed to grow beyond 1, so the current behavior is retained. * Split acpi_device_wakeup() into acpi_device_wakeup_enable() and acpi_device_wakeup_disable() and modify the callers of it accordingly. * Introduce a new acpi_wakeup_lock mutex to protect the wakeup enabling/disabling code from races in case it is executed more than once in parallel for the same device (which may happen for bridges theoretically). Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg --- include/acpi/acpi_bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 68bc6be447fd..b7df95dbe7e9 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -316,7 +316,6 @@ struct acpi_device_perf { struct acpi_device_wakeup_flags { u8 valid:1; /* Can successfully enable wakeup? */ u8 notifier_present:1; /* Wake-up notify handler has been installed */ - u8 enabled:1; /* Enabled for wakeup */ }; struct acpi_device_wakeup_context { @@ -333,6 +332,7 @@ struct acpi_device_wakeup { struct acpi_device_wakeup_context context; struct wakeup_source *ws; int prepare_count; + int enable_count; }; struct acpi_device_physical_node { -- cgit From 1ba51a7c1496fd8f6d5bdd58dafcb1894275b7f0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 1 Aug 2017 02:56:18 +0200 Subject: ACPI / PCI / PM: Rework acpi_pci_propagate_wakeup() The acpi_pci_propagate_wakeup() routine is there to handle cases in which PCI bridges (or PCIe ports) are expected to signal wakeup for devices below them, but currently it doesn't do that correctly. The problem is that acpi_pci_propagate_wakeup() uses acpi_pm_set_device_wakeup() for bridges and if that routine is called for multiple times to disable wakeup for the same device, it will disable it on the first invocation and the next calls will have no effect (it works analogously when called to enable wakeup, but that is not a problem). Now, say acpi_pci_propagate_wakeup() has been called for two different devices under the same bridge and it has called acpi_pm_set_device_wakeup() for that bridge each time. The bridge is now enabled to generate wakeup signals. Next, suppose that one of the devices below it resumes and acpi_pci_propagate_wakeup() is called to disable wakeup for that device. It will then call acpi_pm_set_device_wakeup() for the bridge and that will effectively disable remote wakeup for all devices under it even though some of them may still be suspended and remote wakeup may be expected to work for them. To address this (arguably theoretical) issue, allow wakeup.enable_count under struct acpi_device to grow beyond 1 in certain situations. In particular, allow that to happen in acpi_pci_propagate_wakeup() when wakeup is enabled or disabled for PCI bridges, so that wakeup is actually disabled for the bridge when all devices under it resume and not when just one of them does that. Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas --- include/acpi/acpi_bus.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b7df95dbe7e9..de7e8ac9d806 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -606,6 +606,7 @@ acpi_status acpi_remove_pm_notifier(struct acpi_device *adev); bool acpi_pm_device_can_wakeup(struct device *dev); int acpi_pm_device_sleep_state(struct device *, int *, int); int acpi_pm_set_device_wakeup(struct device *dev, bool enable); +int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable); #else static inline void acpi_pm_wakeup_event(struct device *dev) { @@ -636,6 +637,10 @@ static inline int acpi_pm_set_device_wakeup(struct device *dev, bool enable) { return -ENODEV; } +static inline int acpi_pm_set_bridge_wakeup(struct device *dev, bool enable) +{ + return -ENODEV; +} #endif #ifdef CONFIG_ACPI_SLEEP -- cgit From 674e75411fc260b0d4532701228cfe12fc090da8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jul 2017 12:16:38 +0530 Subject: sched: cpufreq: Allow remote cpufreq callbacks With Android UI and benchmarks the latency of cpufreq response to certain scheduling events can become very critical. Currently, callbacks into cpufreq governors are only made from the scheduler if the target CPU of the event is the same as the current CPU. This means there are certain situations where a target CPU may not run the cpufreq governor for some time. One testcase to show this behavior is where a task starts running on CPU0, then a new task is also spawned on CPU0 by a task on CPU1. If the system is configured such that the new tasks should receive maximum demand initially, this should result in CPU0 increasing frequency immediately. But because of the above mentioned limitation though, this does not occur. This patch updates the scheduler core to call the cpufreq callbacks for remote CPUs as well. The schedutil, ondemand and conservative governors are updated to process cpufreq utilization update hooks called for remote CPUs where the remote CPU is managed by the cpufreq policy of the local CPU. The intel_pstate driver is updated to always reject remote callbacks. This is tested with couple of usecases (Android: hackbench, recentfling, galleryfling, vellamo, Ubuntu: hackbench) on ARM hikey board (64 bit octa-core, single policy). Only galleryfling showed minor improvements, while others didn't had much deviation. The reason being that this patch only targets a corner case, where following are required to be true to improve performance and that doesn't happen too often with these tests: - Task is migrated to another CPU. - The task has high demand, and should take the target CPU to higher OPPs. - And the target CPU doesn't call into the cpufreq governor until the next tick. Based on initial work from Steve Muckle. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f10a9b3761cd..c4035964e6b3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -562,6 +562,15 @@ struct governor_attr { size_t count); }; +static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) +{ + /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ + if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) + return true; + + return false; +} + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ -- cgit From 99d14d0e16fadb4de001bacc0ac0786a9ebecf2a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jul 2017 12:16:39 +0530 Subject: cpufreq: Process remote callbacks from any CPU if the platform permits On many platforms, CPUs can do DVFS across cpufreq policies. i.e CPU from policy-A can change frequency of CPUs belonging to policy-B. This is quite common in case of ARM platforms where we don't configure any per-cpu register. Add a flag to identify such platforms and update cpufreq_can_do_remote_dvfs() to allow remote callbacks if this flag is set. Also enable the flag for cpufreq-dt driver which is used only on ARM platforms currently. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c4035964e6b3..1981a4a167ce 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -127,6 +127,15 @@ struct cpufreq_policy { */ unsigned int transition_delay_us; + /* + * Remote DVFS flag (Not added to the driver structure as we don't want + * to access another structure from scheduler hotpath). + * + * Should be set if CPUs can do DVFS on behalf of other CPUs from + * different cpufreq policies. + */ + bool dvfs_possible_from_any_cpu; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; @@ -564,8 +573,13 @@ struct governor_attr { static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) { - /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ - if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) + /* + * Allow remote callbacks if: + * - dvfs_possible_from_any_cpu flag is set + * - the local and remote CPUs share cpufreq policy + */ + if (policy->dvfs_possible_from_any_cpu || + cpumask_test_cpu(smp_processor_id(), policy->cpus)) return true; return false; -- cgit From bc2eecd7ecce40af43b6eb3d256b6076257df846 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 1 Aug 2017 00:31:32 -0400 Subject: futex: Allow for compiling out PI support This makes it possible to preserve basic futex support and compile out the PI support when RT mutexes are not available. Signed-off-by: Nicolas Pitre Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Darren Hart Link: http://lkml.kernel.org/r/alpine.LFD.2.20.1708010024190.5981@knanqh.ubzr --- include/linux/futex.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 7c5b694864cd..f36bfd26f998 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -54,7 +54,6 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); -extern void exit_pi_state_list(struct task_struct *curr); #ifdef CONFIG_HAVE_FUTEX_CMPXCHG #define futex_cmpxchg_enabled 1 #else @@ -64,8 +63,14 @@ extern int futex_cmpxchg_enabled; static inline void exit_robust_list(struct task_struct *curr) { } +#endif + +#ifdef CONFIG_FUTEX_PI +extern void exit_pi_state_list(struct task_struct *curr); +#else static inline void exit_pi_state_list(struct task_struct *curr) { } #endif + #endif -- cgit From ffb959bbdf923b4f89a08a04aba2501b1b16d164 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 31 Jul 2017 10:29:38 -0400 Subject: mm: remove optimizations based on i_size in mapping writeback waits Marcelo added this i_size based optimization with a patch in 2004 (commitid is from the linux-history tree): commit 765dad09b4ac101a32d87af2bb793c3060497d3c Author: Marcelo Tosatti Date: Tue Sep 7 17:51:17 2004 -0700 small wait_on_page_writeback_range() optimization filemap_fdatawait() calls wait_on_page_writeback_range() with -1 as "end" parameter. This is not needed since we know the EOF from the inode. Use that instead. There may be races here, particularly with clustered or network filesystems. It also seems like a bit of a layering violation since we're operating on an address_space here, not an inode. Finally, it's also questionable whether this optimization really helps on workloads that we care about. Should we be optimizing for writeback vs. truncate races in a codepath where we expect to wait anyway? It doesn't seem worth the risk. Remove this optimization from the filemap_fdatawait codepaths. This means that filemap_fdatawait becomes a trivial wrapper around filemap_fdatawait_range. Reviewed-by: Jan Kara Signed-off-by: Jeff Layton --- include/linux/fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index af592ca3d509..909210bd6366 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2538,10 +2538,15 @@ extern int invalidate_inode_pages2_range(struct address_space *mapping, extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); -extern int filemap_fdatawait(struct address_space *); extern int filemap_fdatawait_keep_errors(struct address_space *mapping); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); + +static inline int filemap_fdatawait(struct address_space *mapping) +{ + return filemap_fdatawait_range(mapping, 0, LLONG_MAX); +} + extern bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, -- cgit From 6104c37094e729f3d4ce65797002112735d49cd1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 1 Aug 2017 17:32:07 +0200 Subject: fbcon: Make fbcon a built-time depency for fbdev There's a bunch of folks who're trying to make printk less contended and faster, but there's a problem: printk uses the console_lock, and the console lock has become the BKL for all things fbdev/fbcon, which in turn pulled in half the drm subsystem under that lock. That's awkward. There reasons for that is probably just a historical accident: - fbcon is a runtime option of fbdev, i.e. at runtime you can pick whether your fbdev driver instances are used as kernel consoles. Unfortunately this wasn't implemented with some module option, but through some module loading magic: As long as you don't load fbcon.ko, there's no fbdev console support, but loading it (in any order wrt fbdev drivers) will create console instances for all fbdev drivers. - This was implemented through a notifier chain. fbcon.ko enumerates all fbdev instances at load time and also registers itself as listener in the fbdev notifier. The fbdev core tries to register new fbdev instances with fbcon using the notifier. - On top of that the modifier chain is also used at runtime by the fbdev subsystem to e.g. control backlights for panels. - The problem is that the notifier puts a mutex locking context between fbdev and fbcon, which mixes up the locking contexts for both the runtime usage and the register time usage to notify fbcon. And at runtime fbcon (through the fbdev core) might call into the notifier from a printk critical section while console_lock is held. - This means console_lock must be an outer lock for the entire fbdev subsystem, which also means it must be acquired when registering a new framebuffer driver as the outermost lock since we might call into fbcon (through the notifier) which would result in a locking inversion if fbcon would acquire the console_lock from its notifier callback (which it needs to register the console). - console_lock can be held anywhere, since printk can be called anywhere, and through the above story, plus drm/kms being an fbdev driver, we pull in a shocking amount of locking hiercharchy underneath the console_lock. Which makes cleaning up printk really hard (not even splitting console_lock into an rwsem is all that useful due to this). There's various ways to address this, but the cleanest would be to make fbcon a compile-time option, where fbdev directly calls the fbcon register functions from register_framebuffer, or dummy static inline versions if fbcon is disabled. Maybe augmented with a runtime knob to disable fbcon, if that's needed (for debugging perhaps). But this could break some users who rely on the magic "loading fbcon.ko enables/disables fbdev framebuffers at runtime" thing, even if that's unlikely. Hence we must be careful: 1. Create a compile-time dependency between fbcon and fbdev in the least minimal way. This is what this patch does. 2. Wait at least 1 year to give possible users time to scream about how we broke their setup. Unlikely, since all distros make fbcon compile-in, and embedded platforms only compile stuff they know they need anyway. But still. 3. Convert the notifier to direct functions calls, with dummy static inlines if fbcon is disabled. We'll still need the fb notifier for the other uses (like backlights), but we can probably move it into the fb core (atm it must be built-into vmlinux). 4. Push console_lock down the call-chain, until it is down in console_register again. 5. Finally start to clean up and rework the printk/console locking. For context of this saga see commit 50e244cc793d511b86adea24972f3a7264cae114 Author: Alan Cox Date: Fri Jan 25 10:28:15 2013 +1000 fb: rework locking to fix lock ordering on takeover plus the pile of commits on top that tried to make this all work without terminally upsetting lockdep. We've uncovered all this when console_lock lockdep annotations where added in commit daee779718a319ff9f83e1ba3339334ac650bb22 Author: Daniel Vetter Date: Sat Sep 22 19:52:11 2012 +0200 console: implement lockdep support for console_lock On the patch itself: - Switch CONFIG_FRAMEBUFFER_CONSOLE to be a boolean, using the overall CONFIG_FB tristate to decided whether it should be a module or built-in. - At first I thought I could force the build depency with just a dummy symbol that fbcon.ko exports and fb.ko uses. But that leads to a module depency cycle (it works fine when built-in). Since this tight binding is the entire goal the simplest solution is to move all the fbcon modules (and there's a bunch of optinal source-files which are each modules of their own, for no good reason) into the overall fb.ko core module. That's a bit more than what I would have liked to do in this patch, but oh well. Signed-off-by: Daniel Vetter Cc: Alan Cox Cc: Sergey Senozhatsky Cc: Steven Rostedt Reviewed-by: Sean Paul Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/fbcon.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/fbcon.h (limited to 'include') diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h new file mode 100644 index 000000000000..0fac6305d51c --- /dev/null +++ b/include/linux/fbcon.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_FBCON_H +#define _LINUX_FBCON_H + +#ifdef CONFIG_FRAMEBUFFER_CONSOLE +void __init fb_console_init(void); +void __exit fb_console_exit(void); +#else +static void fb_console_init(void) {} +static void fb_console_exit(void) {} +#endif + +#endif /* _LINUX_FBCON_H */ -- cgit From 376b3ff54c9a1cd54d71b1102e061fccf56d535e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 1 Aug 2017 17:33:02 +0200 Subject: fbdev: Nuke FBINFO_MODULE Instead check info->ops->owner, which amounts to the same. Spotted because I want to remove the pile of broken and cargo-culted fb_info->flags assignments in drm drivers. Signed-off-by: Daniel Vetter Reviewed-by: Sean Paul Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/fb.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index a964d076b4dc..f4386b0ccf40 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -400,7 +400,7 @@ struct fb_tile_ops { #endif /* CONFIG_FB_TILEBLITTING */ /* FBINFO_* = fb_info.flags bit flags */ -#define FBINFO_MODULE 0x0001 /* Low-level driver is a module */ +#define FBINFO_DEFAULT 0 #define FBINFO_HWACCEL_DISABLED 0x0002 /* When FBINFO_HWACCEL_DISABLED is set: * Hardware acceleration is turned off. Software implementations @@ -533,14 +533,6 @@ static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { return a; } -#ifdef MODULE -#define FBINFO_DEFAULT FBINFO_MODULE -#else -#define FBINFO_DEFAULT 0 -#endif - -// This will go away -#define FBINFO_FLAG_MODULE FBINFO_MODULE #define FBINFO_FLAG_DEFAULT FBINFO_DEFAULT /* This will go away -- cgit From ddb4a1442def2a78b91a85b4251fb712ef23662b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Jul 2017 15:25:23 -0700 Subject: exec: Rename bprm->cred_prepared to called_set_creds The cred_prepared bprm flag has a misleading name. It has nothing to do with the bprm_prepare_cred hook, and actually tracks if bprm_set_creds has been called. Rename this flag and improve its comment. Cc: David Howells Cc: Stephen Smalley Cc: Casey Schaufler Signed-off-by: Kees Cook Acked-by: John Johansen Acked-by: James Morris Acked-by: Paul Moore Acked-by: Serge Hallyn --- include/linux/binfmts.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 3ae9013eeaaa..9023e1d2d5cd 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -25,8 +25,12 @@ struct linux_binprm { struct mm_struct *mm; unsigned long p; /* current top of mem */ unsigned int - cred_prepared:1,/* true if creds already prepared (multiple - * preps happen for interpreters) */ + /* + * True after the bprm_set_creds hook has been called once + * (multiple calls can be made via prepare_binprm() for + * binfmt_script/misc). + */ + called_set_creds:1, cap_effective:1;/* true if has elevated effective capabilities, * false if not; except for init which inherits * its parent's caps anyway */ -- cgit From c425e189ffd7720c881fe9ccd7143cea577f6d03 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Jul 2017 15:25:22 -0700 Subject: binfmt: Introduce secureexec flag The bprm_secureexec hook can be moved earlier. Right now, it is called during create_elf_tables(), via load_binary(), via search_binary_handler(), via exec_binprm(). Nearly all (see exception below) state used by bprm_secureexec is created during the bprm_set_creds hook, called from prepare_binprm(). For all LSMs (except commoncaps described next), only the first execution of bprm_set_creds takes any effect (they all check bprm->called_set_creds which prepare_binprm() sets after the first call to the bprm_set_creds hook). However, all these LSMs also only do anything with bprm_secureexec when they detected a secure state during their first run of bprm_set_creds. Therefore, it is functionally identical to move the detection into bprm_set_creds, since the results from secureexec here only need to be based on the first call to the LSM's bprm_set_creds hook. The single exception is that the commoncaps secureexec hook also examines euid/uid and egid/gid differences which are controlled by bprm_fill_uid(), via prepare_binprm(), which can be called multiple times (e.g. binfmt_script, binfmt_misc), and may clear the euid/egid for the final load (i.e. the script interpreter). However, while commoncaps specifically ignores bprm->cred_prepared, and runs its bprm_set_creds hook each time prepare_binprm() may get called, it needs to base the secureexec decision on the final call to bprm_set_creds. As a result, it will need special handling. To begin this refactoring, this adds the secureexec flag to the bprm struct, and calls the secureexec hook during setup_new_exec(). This is safe since all the cred work is finished (and past the point of no return). This explicit call will be removed in later patches once the hook has been removed. Cc: David Howells Signed-off-by: Kees Cook Reviewed-by: John Johansen Acked-by: Serge Hallyn Reviewed-by: James Morris --- include/linux/binfmts.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 9023e1d2d5cd..16838ba7ee75 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -31,9 +31,15 @@ struct linux_binprm { * binfmt_script/misc). */ called_set_creds:1, - cap_effective:1;/* true if has elevated effective capabilities, + cap_effective:1,/* true if has elevated effective capabilities, * false if not; except for init which inherits * its parent's caps anyway */ + /* + * Set by bprm_set_creds hook to indicate a privilege-gaining + * exec has happened. Used to sanitize execution environment + * and to set AT_SECURE auxv for glibc. + */ + secureexec:1; #ifdef __alpha__ unsigned int taso:1; #endif -- cgit From 46d98eb4e1d2bc225f661879e0e157a952107598 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Jul 2017 15:25:27 -0700 Subject: commoncap: Refactor to remove bprm_secureexec hook The commoncap implementation of the bprm_secureexec hook is the only LSM that depends on the final call to its bprm_set_creds hook (since it may be called for multiple files, it ignores bprm->called_set_creds). As a result, it cannot safely _clear_ bprm->secureexec since other LSMs may have set it. Instead, remove the bprm_secureexec hook by introducing a new flag to bprm specific to commoncap: cap_elevated. This is similar to cap_effective, but that is used for a specific subset of elevated privileges, and exists solely to track state from bprm_set_creds to bprm_secureexec. As such, it will be removed in the next patch. Here, set the new bprm->cap_elevated flag when setuid/setgid has happened from bprm_fill_uid() or fscapabilities have been prepared. This temporarily moves the bprm_secureexec hook to a static inline. The helper will be removed in the next patch; this makes the step easier to review and bisect, since this does not introduce any changes to inputs nor outputs to the "elevated privileges" calculation. The new flag is merged with the bprm->secureexec flag in setup_new_exec() since this marks the end of any further prepare_binprm() calls. Cc: Andy Lutomirski Signed-off-by: Kees Cook Reviewed-by: Andy Lutomirski Acked-by: James Morris Acked-by: Serge Hallyn --- include/linux/binfmts.h | 7 +++++++ include/linux/security.h | 3 +-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 16838ba7ee75..213c61fa3780 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -34,6 +34,13 @@ struct linux_binprm { cap_effective:1,/* true if has elevated effective capabilities, * false if not; except for init which inherits * its parent's caps anyway */ + /* + * True if most recent call to the commoncaps bprm_set_creds + * hook (due to multiple prepare_binprm() calls from the + * binfmt_script/misc handlers) resulted in elevated + * privileges. + */ + cap_elevated:1, /* * Set by bprm_set_creds hook to indicate a privilege-gaining * exec has happened. Used to sanitize execution environment diff --git a/include/linux/security.h b/include/linux/security.h index b6ea1dc9cc9d..f89832ccdf55 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -85,7 +85,6 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); extern int cap_bprm_set_creds(struct linux_binprm *bprm); -extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int cap_inode_removexattr(struct dentry *dentry, const char *name); @@ -543,7 +542,7 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm) static inline int security_bprm_secureexec(struct linux_binprm *bprm) { - return cap_bprm_secureexec(bprm); + return 0; } static inline int security_sb_alloc(struct super_block *sb) -- cgit From ee67ae7ef6ff499137292ac8a9dfe86096796283 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Jul 2017 15:25:28 -0700 Subject: commoncap: Move cap_elevated calculation into bprm_set_creds Instead of a separate function, open-code the cap_elevated test, which lets us entirely remove bprm->cap_effective (to use the local "effective" variable instead), and more accurately examine euid/egid changes via the existing local "is_setid". The following LTP tests were run to validate the changes: # ./runltp -f syscalls -s cap # ./runltp -f securebits # ./runltp -f cap_bounds # ./runltp -f filecaps All kernel selftests for capabilities and exec continue to pass as well. Signed-off-by: Kees Cook Reviewed-by: James Morris Acked-by: Serge Hallyn Reviewed-by: Andy Lutomirski --- include/linux/binfmts.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 213c61fa3780..fb44d6180ca0 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -31,9 +31,6 @@ struct linux_binprm { * binfmt_script/misc). */ called_set_creds:1, - cap_effective:1,/* true if has elevated effective capabilities, - * false if not; except for init which inherits - * its parent's caps anyway */ /* * True if most recent call to the commoncaps bprm_set_creds * hook (due to multiple prepare_binprm() calls from the -- cgit From 2af622802696e1dbe28d81c8ea6355dc30800396 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Jul 2017 15:25:29 -0700 Subject: LSM: drop bprm_secureexec hook This removes the bprm_secureexec hook since the logic has been folded into the bprm_set_creds hook for all LSMs now. Cc: Eric W. Biederman Signed-off-by: Kees Cook Reviewed-by: John Johansen Acked-by: James Morris Acked-by: Serge Hallyn --- include/linux/lsm_hooks.h | 14 +++++--------- include/linux/security.h | 6 ------ 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3a90febadbe2..d1c7bef25691 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -43,7 +43,11 @@ * interpreters. The hook can tell whether it has already been called by * checking to see if @bprm->security is non-NULL. If so, then the hook * may decide either to retain the security information saved earlier or - * to replace it. + * to replace it. The hook must set @bprm->secureexec to 1 if a "secure + * exec" has happened as a result of this hook call. The flag is used to + * indicate the need for a sanitized execution environment, and is also + * passed in the ELF auxiliary table on the initial stack to indicate + * whether libc should enable secure mode. * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. * @bprm_check_security: @@ -71,12 +75,6 @@ * linux_binprm structure. This hook is a good place to perform state * changes on the process such as clearing out non-inheritable signal * state. This is called immediately after commit_creds(). - * @bprm_secureexec: - * Return a boolean value (0 or 1) indicating whether a "secure exec" - * is required. The flag is passed in the auxiliary table - * on the initial stack to the ELF interpreter to indicate whether libc - * should enable secure mode. - * @bprm contains the linux_binprm structure. * * Security hooks for filesystem operations. * @@ -1388,7 +1386,6 @@ union security_list_options { int (*bprm_set_creds)(struct linux_binprm *bprm); int (*bprm_check_security)(struct linux_binprm *bprm); - int (*bprm_secureexec)(struct linux_binprm *bprm); void (*bprm_committing_creds)(struct linux_binprm *bprm); void (*bprm_committed_creds)(struct linux_binprm *bprm); @@ -1710,7 +1707,6 @@ struct security_hook_heads { struct list_head vm_enough_memory; struct list_head bprm_set_creds; struct list_head bprm_check_security; - struct list_head bprm_secureexec; struct list_head bprm_committing_creds; struct list_head bprm_committed_creds; struct list_head sb_alloc_security; diff --git a/include/linux/security.h b/include/linux/security.h index f89832ccdf55..974bb9b0996c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -231,7 +231,6 @@ int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); void security_bprm_committing_creds(struct linux_binprm *bprm); void security_bprm_committed_creds(struct linux_binprm *bprm); -int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); int security_sb_copy_data(char *orig, char *copy); @@ -540,11 +539,6 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm) { } -static inline int security_bprm_secureexec(struct linux_binprm *bprm) -{ - return 0; -} - static inline int security_sb_alloc(struct super_block *sb) { return 0; -- cgit From c4b3eacc1dfef5f36dbdf9a99be37834d3e23ed0 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 17 Jul 2017 17:54:07 +0200 Subject: mtd: spi-nor: Recover from Spansion/Cypress errors S25FL{128|256|512}S datasheets say: "When P_ERR or E_ERR bits are set to one, the WIP bit will remain set to one indicating the device remains busy and unable to receive new operation commands. A Clear Status Register (CLSR) command must be received to return the device to standby mode." Current spi-nor code works until first error occurs, but write/erase errors are not just rare hardware failures, they also occur if user tries to flash write-protected areas. After such attempt no SPI command can be executed any more and even read fails. This patch adds support for P_ERR and E_ERR bits in Status Register 1 (so that operation fails immediately and not after a long timeout) and proper recovery from the error condition. Tested on Spansion S25FS128S, which is supported by S25FL129P entry. Signed-off-by: Alexander Sverdlin Signed-off-by: Cyrille Pitchen --- include/linux/mtd/spi-nor.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 0df3638ff0b8..1f0a7fc7772f 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -105,6 +105,7 @@ /* Used for Spansion flashes only. */ #define SPINOR_OP_BRWR 0x17 /* Bank register write */ +#define SPINOR_OP_CLSR 0x30 /* Clear status register 1 */ /* Used for Micron flashes only. */ #define SPINOR_OP_RD_EVCR 0x65 /* Read EVCR register */ @@ -119,6 +120,9 @@ #define SR_BP2 BIT(4) /* Block protect 2 */ #define SR_TB BIT(5) /* Top/Bottom protect */ #define SR_SRWD BIT(7) /* SR write protect */ +/* Spansion/Cypress specific status bits */ +#define SR_E_ERR BIT(5) +#define SR_P_ERR BIT(6) #define SR_QUAD_EN_MX BIT(6) /* Macronix Quad I/O */ @@ -224,6 +228,7 @@ enum spi_nor_option_flags { SNOR_F_NO_OP_CHIP_ERASE = BIT(2), SNOR_F_S3AN_ADDR_DEFAULT = BIT(3), SNOR_F_READY_XSR_RDY = BIT(4), + SNOR_F_USE_CLSR = BIT(5), }; /** -- cgit From d31ae2548142b7cd12404929ef3a13ae27c9d961 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 12:00:39 -0400 Subject: sunrpc: Const-ify all instances of struct rpc_xprt_ops After transport instance creation, these function pointers never change. Mark them as constant to prevent their use as an attack vector for code injections. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eab1c749e192..f60c55ca2e2e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -174,7 +174,7 @@ enum xprt_transports { struct rpc_xprt { struct kref kref; /* Reference count */ - struct rpc_xprt_ops * ops; /* transport methods */ + const struct rpc_xprt_ops *ops; /* transport methods */ const struct rpc_timeout *timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ -- cgit From c39a0e2c8850f08249383f2425dbd8dbe4baad69 Mon Sep 17 00:00:00 2001 From: Vikas Shivappa Date: Tue, 25 Jul 2017 14:14:20 -0700 Subject: x86/perf/cqm: Wipe out perf based cqm 'perf cqm' never worked due to the incompatibility between perf infrastructure and cqm hardware support. The hardware uses RMIDs to track the llc occupancy of tasks and these RMIDs are per package. This makes monitoring a hierarchy like cgroup along with monitoring of tasks separately difficult and several patches sent to lkml to fix them were NACKed. Further more, the following issues in the current perf cqm make it almost unusable: 1. No support to monitor the same group of tasks for which we do allocation using resctrl. 2. It gives random and inaccurate data (mostly 0s) once we run out of RMIDs due to issues in Recycling. 3. Recycling results in inaccuracy of data because we cannot guarantee that the RMID was stolen from a task when it was not pulling data into cache or even when it pulled the least data. Also for monitoring llc_occupancy, if we stop using an RMID_x and then start using an RMID_y after we reclaim an RMID from an other event, we miss accounting all the occupancy that was tagged to RMID_x at a later perf_count. 2. Recycling code makes the monitoring code complex including scheduling because the event can lose RMID any time. Since MBM counters count bandwidth for a period of time by taking snap shot of total bytes at two different times, recycling complicates the way we count MBM in a hierarchy. Also we need a spin lock while we do the processing to account for MBM counter overflow. We also currently use a spin lock in scheduling to prevent the RMID from being taken away. 4. Lack of support when we run different kind of event like task, system-wide and cgroup events together. Data mostly prints 0s. This is also because we can have only one RMID tied to a cpu as defined by the cqm hardware but a perf can at the same time tie multiple events during one sched_in. 5. No support of monitoring a group of tasks. There is partial support for cgroup but it does not work once there is a hierarchy of cgroups or if we want to monitor a task in a cgroup and the cgroup itself. 6. No support for monitoring tasks for the lifetime without perf overhead. 7. It reported the aggregate cache occupancy or memory bandwidth over all sockets. But most cloud and VMM based use cases want to know the individual per-socket usage. Signed-off-by: Vikas Shivappa Signed-off-by: Thomas Gleixner Cc: ravi.v.shankar@intel.com Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: vikas.shivappa@intel.com Cc: ak@linux.intel.com Cc: davidcc@google.com Cc: reinette.chatre@intel.com Link: http://lkml.kernel.org/r/1501017287-28083-2-git-send-email-vikas.shivappa@linux.intel.com --- include/linux/perf_event.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3b873fc59e4..4572dbabd4e8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -139,14 +139,6 @@ struct hw_perf_event { /* for tp_event->class */ struct list_head tp_list; }; - struct { /* intel_cqm */ - int cqm_state; - u32 cqm_rmid; - int is_group_event; - struct list_head cqm_events_entry; - struct list_head cqm_groups_entry; - struct list_head cqm_group_entry; - }; struct { /* itrace */ int itrace_started; }; @@ -416,11 +408,6 @@ struct pmu { size_t task_ctx_size; - /* - * Return the count value for a counter. - */ - u64 (*count) (struct perf_event *event); /*optional*/ - /* * Set up pmu-private data structures for an AUX area */ @@ -1111,11 +1098,6 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, __perf_event_task_sched_out(prev, next); } -static inline u64 __perf_event_count(struct perf_event *event) -{ - return local64_read(&event->count) + atomic64_read(&event->child_count); -} - extern void perf_event_mmap(struct vm_area_struct *vma); extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -- cgit From f01d7d51f577b5dc0fa5919ab8a9228e2bf49f3e Mon Sep 17 00:00:00 2001 From: Vikas Shivappa Date: Tue, 25 Jul 2017 14:14:22 -0700 Subject: x86/intel_rdt: Introduce a common compile option for RDT We currently have a CONFIG_RDT_A which is for RDT(Resource directory technology) allocation based resctrl filesystem interface. As a preparation to add support for RDT monitoring as well into the same resctrl filesystem, change the config option to be CONFIG_RDT which would include both RDT allocation and monitoring code. No functional change. Signed-off-by: Vikas Shivappa Signed-off-by: Thomas Gleixner Cc: ravi.v.shankar@intel.com Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: vikas.shivappa@intel.com Cc: ak@linux.intel.com Cc: davidcc@google.com Cc: reinette.chatre@intel.com Link: http://lkml.kernel.org/r/1501017287-28083-4-git-send-email-vikas.shivappa@linux.intel.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..20b2ff2f4fde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -898,7 +898,7 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif -#ifdef CONFIG_INTEL_RDT_A +#ifdef CONFIG_INTEL_RDT int closid; #endif #ifdef CONFIG_FUTEX -- cgit From 0734ded1abee9439b0c5d7b62af1ead78aab895b Mon Sep 17 00:00:00 2001 From: Vikas Shivappa Date: Tue, 25 Jul 2017 14:14:33 -0700 Subject: x86/intel_rdt: Change closid type from int to u32 OS associates a CLOSid(Class of service id) to a task by writing the high 32 bits of per CPU IA32_PQR_ASSOC MSR when a task is scheduled in. CPUID.(EAX=10H, ECX=1):EDX[15:0] enumerates the max CLOSID supported and it is zero indexed. Hence change the type to u32 from int. Signed-off-by: Vikas Shivappa Signed-off-by: Thomas Gleixner Cc: ravi.v.shankar@intel.com Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: vikas.shivappa@intel.com Cc: ak@linux.intel.com Cc: davidcc@google.com Cc: reinette.chatre@intel.com Link: http://lkml.kernel.org/r/1501017287-28083-15-git-send-email-vikas.shivappa@linux.intel.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 20b2ff2f4fde..8839fd09540c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -899,7 +899,7 @@ struct task_struct { struct list_head cg_list; #endif #ifdef CONFIG_INTEL_RDT - int closid; + u32 closid; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; -- cgit From d6aaba615a482ce7d3ec218cf7b8d02d0d5753b8 Mon Sep 17 00:00:00 2001 From: Vikas Shivappa Date: Tue, 25 Jul 2017 14:14:34 -0700 Subject: x86/intel_rdt/cqm: Add tasks file support The root directory, ctrl_mon and monitor groups are populated with a read/write file named "tasks". When read, it shows all the task IDs assigned to the resource group. Tasks can be added to groups by writing the PID to the file. A task can be present in one "ctrl_mon" group "and" one "monitor" group. IOW a PID_x can be seen in a ctrl_mon group and a monitor group at the same time. When a task is added to a ctrl_mon group, it is automatically removed from the previous ctrl_mon group where it belonged. Similarly if a task is moved to a monitor group it is removed from the previous monitor group . Also since the monitor groups can only have subset of tasks of parent ctrl_mon group, a task can be moved to a monitor group only if its already present in the parent ctrl_mon group. Task membership is indicated by a new field in the task_struct "u32 rmid" which holds the RMID for the task. RMID=0 is reserved for the default root group where the tasks belong to at mount. [tony: zero the rmid if rdtgroup was deleted when task was being moved] Signed-off-by: Tony Luck Signed-off-by: Vikas Shivappa Signed-off-by: Thomas Gleixner Cc: ravi.v.shankar@intel.com Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: vikas.shivappa@intel.com Cc: ak@linux.intel.com Cc: davidcc@google.com Cc: reinette.chatre@intel.com Link: http://lkml.kernel.org/r/1501017287-28083-16-git-send-email-vikas.shivappa@linux.intel.com --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8839fd09540c..067a41ac5fd4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -900,6 +900,7 @@ struct task_struct { #endif #ifdef CONFIG_INTEL_RDT u32 closid; + u32 rmid; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; -- cgit From 306b13eb3cf9515a8214bbf5d69d811371d05792 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:41 -0700 Subject: proto_ops: Add locked held versions of sendmsg and sendpage Add new proto_ops sendmsg_locked and sendpage_locked that can be called when the socket lock is already held. Correspondingly, add kernel_sendmsg_locked and kernel_sendpage_locked as front end functions. These functions will be used in zero proxy so that we can take the socket lock in a ULP sendmsg/sendpage and then directly call the backend transport proto_ops functions. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/net.h | 12 ++++++++++++ include/net/sock.h | 3 +++ include/net/tcp.h | 3 +++ 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index dda2cc939a53..b5c15b31709b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -190,8 +190,16 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); + + /* The following functions are called internally by kernel with + * sock lock already held. + */ int (*read_sock)(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); + int (*sendpage_locked)(struct sock *sk, struct page *page, + int offset, size_t size, int flags); + int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, + size_t size); }; #define DECLARE_SOCKADDR(type, dst, src) \ @@ -279,6 +287,8 @@ do { \ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); +int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, + struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); @@ -297,6 +307,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..393c38e9f6aa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1582,11 +1582,14 @@ int sock_no_shutdown(struct socket *, int); int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *); int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int); int sock_no_sendmsg(struct socket *, struct msghdr *, size_t); +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol diff --git a/include/net/tcp.h b/include/net/tcp.h index 3ecb62811004..bb1881b4ce48 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -350,8 +350,11 @@ int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags); ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tcp_release_cb(struct sock *sk); -- cgit From 20bf50de3028cb15fa81e1d1e63ab6e0c85257fc Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:42 -0700 Subject: skbuff: Function to send an skbuf on a socket Add skb_send_sock to send an skbuff on a socket within the kernel. Arguments include an offset so that an skbuf might be sent in mulitple calls (e.g. send buffer limit is hit). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4093552be1de..18e76bf9574e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3113,6 +3113,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, + int len); +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, -- cgit From bbb03029a899679d73e62d7e6ae80348cc5d0054 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 28 Jul 2017 16:22:43 -0700 Subject: strparser: Generalize strparser Generalize strparser from more than just being used in conjunction with read_sock. strparser will also be used in the send path with zero proxy. The primary change is to create strp_process function that performs the critical processing on skbs. The documentation is also updated to reflect the new uses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 119 +++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/net/strparser.h b/include/net/strparser.h index 0c28ad97c52f..4fe966a0ad92 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -18,26 +18,26 @@ #define STRP_STATS_INCR(stat) ((stat)++) struct strp_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; + unsigned long long msgs; + unsigned long long bytes; + unsigned int mem_fail; + unsigned int need_more_hdr; + unsigned int msg_too_big; + unsigned int msg_timeouts; + unsigned int bad_hdr_len; }; struct strp_aggr_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; - unsigned int rx_aborts; - unsigned int rx_interrupted; - unsigned int rx_unrecov_intr; + unsigned long long msgs; + unsigned long long bytes; + unsigned int mem_fail; + unsigned int need_more_hdr; + unsigned int msg_too_big; + unsigned int msg_timeouts; + unsigned int bad_hdr_len; + unsigned int aborts; + unsigned int interrupted; + unsigned int unrecov_intr; }; struct strparser; @@ -48,16 +48,18 @@ struct strp_callbacks { void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); int (*read_sock_done)(struct strparser *strp, int err); void (*abort_parser)(struct strparser *strp, int err); + void (*lock)(struct strparser *strp); + void (*unlock)(struct strparser *strp); }; -struct strp_rx_msg { +struct strp_msg { int full_len; int offset; }; -static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) +static inline struct strp_msg *strp_msg(struct sk_buff *skb) { - return (struct strp_rx_msg *)((void *)skb->cb + + return (struct strp_msg *)((void *)skb->cb + offsetof(struct qdisc_skb_cb, data)); } @@ -65,18 +67,18 @@ static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) struct strparser { struct sock *sk; - u32 rx_stopped : 1; - u32 rx_paused : 1; - u32 rx_aborted : 1; - u32 rx_interrupted : 1; - u32 rx_unrecov_intr : 1; - - struct sk_buff **rx_skb_nextp; - struct timer_list rx_msg_timer; - struct sk_buff *rx_skb_head; - unsigned int rx_need_bytes; - struct delayed_work rx_delayed_work; - struct work_struct rx_work; + u32 stopped : 1; + u32 paused : 1; + u32 aborted : 1; + u32 interrupted : 1; + u32 unrecov_intr : 1; + + struct sk_buff **skb_nextp; + struct timer_list msg_timer; + struct sk_buff *skb_head; + unsigned int need_bytes; + struct delayed_work delayed_work; + struct work_struct work; struct strp_stats stats; struct strp_callbacks cb; }; @@ -84,7 +86,7 @@ struct strparser { /* Must be called with lock held for attached socket */ static inline void strp_pause(struct strparser *strp) { - strp->rx_paused = 1; + strp->paused = 1; } /* May be called without holding lock for attached socket */ @@ -97,37 +99,37 @@ static inline void save_strp_stats(struct strparser *strp, #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += \ strp->stats._stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); + SAVE_PSOCK_STATS(msgs); + SAVE_PSOCK_STATS(bytes); + SAVE_PSOCK_STATS(mem_fail); + SAVE_PSOCK_STATS(need_more_hdr); + SAVE_PSOCK_STATS(msg_too_big); + SAVE_PSOCK_STATS(msg_timeouts); + SAVE_PSOCK_STATS(bad_hdr_len); #undef SAVE_PSOCK_STATS - if (strp->rx_aborted) - agg_stats->rx_aborts++; - if (strp->rx_interrupted) - agg_stats->rx_interrupted++; - if (strp->rx_unrecov_intr) - agg_stats->rx_unrecov_intr++; + if (strp->aborted) + agg_stats->aborts++; + if (strp->interrupted) + agg_stats->interrupted++; + if (strp->unrecov_intr) + agg_stats->unrecov_intr++; } static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, struct strp_aggr_stats *agg_stats) { #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); - SAVE_PSOCK_STATS(rx_aborts); - SAVE_PSOCK_STATS(rx_interrupted); - SAVE_PSOCK_STATS(rx_unrecov_intr); + SAVE_PSOCK_STATS(msgs); + SAVE_PSOCK_STATS(bytes); + SAVE_PSOCK_STATS(mem_fail); + SAVE_PSOCK_STATS(need_more_hdr); + SAVE_PSOCK_STATS(msg_too_big); + SAVE_PSOCK_STATS(msg_timeouts); + SAVE_PSOCK_STATS(bad_hdr_len); + SAVE_PSOCK_STATS(aborts); + SAVE_PSOCK_STATS(interrupted); + SAVE_PSOCK_STATS(unrecov_intr); #undef SAVE_PSOCK_STATS } @@ -135,8 +137,11 @@ static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, void strp_done(struct strparser *strp); void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); -int strp_init(struct strparser *strp, struct sock *csk, +int strp_init(struct strparser *strp, struct sock *sk, struct strp_callbacks *cb); void strp_data_ready(struct strparser *strp); +int strp_process(struct strparser *strp, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo); #endif /* __NET_STRPARSER_H_ */ -- cgit From c613c209c3f351d47158f728271d0c73b6dd24c6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 31 Jul 2017 08:15:47 -0400 Subject: net: add skb_frag_foreach_page and use with kmap_atomic Skb frags may contain compound pages. Various operations map frags temporarily using kmap_atomic, but this function works on single pages, not whole compound pages. The distinction is only relevant for high mem pages that require temporary mappings. Introduce a looping mechanism that for compound highmem pages maps one page at a time, does not change behavior on other pages. Use the loop in the kmap_atomic callers in net/core/skbuff.c. Verified by triggering skb_copy_bits with tcpdump -n -c 100 -i ${DEV} -w /dev/null & netperf -t TCP_STREAM -H ${HOST} and by triggering __skb_checksum with ethtool -K ${DEV} tx off repeated the tests with looping on a non-highmem platform (x86_64) by making skb_frag_must_loop always return true. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 18e76bf9574e..6f9f1b2715ec 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -345,6 +345,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) frag->size -= delta; } +static inline bool skb_frag_must_loop(struct page *p) +{ +#if defined(CONFIG_HIGHMEM) + if (PageHighMem(p)) + return true; +#endif + return false; +} + +/** + * skb_frag_foreach_page - loop over pages in a fragment + * + * @f: skb frag to operate on + * @f_off: offset from start of f->page.p + * @f_len: length from f_off to loop over + * @p: (temp var) current page + * @p_off: (temp var) offset from start of current page, + * non-zero only on first page. + * @p_len: (temp var) length in current page, + * < PAGE_SIZE only on first and last page. + * @copied: (temp var) length so far, excluding current p_len. + * + * A fragment can hold a compound page, in which case per-page + * operations, notably kmap_atomic, must be called for each + * regular page. + */ +#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ + for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \ + p_off = (f_off) & (PAGE_SIZE - 1), \ + p_len = skb_frag_must_loop(p) ? \ + min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ + copied = 0; \ + copied < f_len; \ + copied += p_len, p++, p_off = 0, \ + p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ + #define HAVE_HW_TIME_STAMP /** -- cgit From 46587e4a312780a63132483bc8678c397eca6aff Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 1 Aug 2017 16:32:39 -0400 Subject: net: dsa: remove PHY device argument from .set_eee The DSA switch operations for EEE are only meant to configure a port's MAC EEE settings. The port's PHY EEE settings are accessed by the DSA layer and must be made available via a proper PHY driver. In order to reduce this confusion, remove the phy_device argument from the .set_eee operation. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 88da272d20d0..ce46db323394 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -335,7 +335,6 @@ struct dsa_switch_ops { * EEE setttings */ int (*set_eee)(struct dsa_switch *ds, int port, - struct phy_device *phydev, struct ethtool_eee *e); int (*get_eee)(struct dsa_switch *ds, int port, struct ethtool_eee *e); -- cgit From 08f500610f39809c107f206cba1f799c98c38054 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 1 Aug 2017 16:32:41 -0400 Subject: net: dsa: rename switch EEE ops To avoid confusion with the PHY EEE settings, rename the .set_eee and .get_eee ops to respectively .set_mac_eee and .get_mac_eee. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index ce46db323394..0b1a0622b33c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -332,12 +332,12 @@ struct dsa_switch_ops { struct phy_device *phy); /* - * EEE setttings + * Port's MAC EEE settings */ - int (*set_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); - int (*get_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + int (*set_mac_eee)(struct dsa_switch *ds, int port, + struct ethtool_eee *e); + int (*get_mac_eee)(struct dsa_switch *ds, int port, + struct ethtool_eee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); -- cgit From 581c4484769e692eade761c17c22549aaefe6749 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 1 Aug 2017 15:38:01 -0700 Subject: HID: input: map digitizer battery usage We already mapped battery strength reports from the generic device control page, but we did not update capacity from input reports, nor we mapped the battery strength report from the digitizer page, so let's implement this now. Batteries driven by the input reports will now start in "unknown" state, and will get updated once we receive first report containing battery strength from the device. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5006f9b5d837..281d1ffcbe02 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -542,10 +542,12 @@ struct hid_device { /* device report descriptor */ * battery is non-NULL. */ struct power_supply *battery; + __s32 battery_capacity; __s32 battery_min; __s32 battery_max; __s32 battery_report_type; __s32 battery_report_id; + bool battery_reported; #endif unsigned int status; /* see STAT flags above */ -- cgit From ffdb5211da1c20354f1b40c204b6cf6c29c68161 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Tue, 1 Aug 2017 12:49:08 +0300 Subject: xfrm: Auto-load xfrm offload modules IPSec crypto offload depends on the protocol-specific offload module (such as esp_offload.ko). When the user installs an SA with crypto-offload, load the offload module automatically, in the same way that the protocol module is loaded (such as esp.ko) Signed-off-by: Ilan Tayari Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afb4929d7232..5a360100136c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -43,6 +43,8 @@ MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) #define MODULE_ALIAS_XFRM_TYPE(family, proto) \ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) +#define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \ + MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto)) #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) @@ -1558,7 +1560,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); int xfrm_state_mtu(struct xfrm_state *x, int mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay); +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); -- cgit From f70f250a77313b542531e1ff7a449cd0ccd83ec0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 1 Aug 2017 12:49:10 +0300 Subject: net: Allow IPsec GSO for local sockets This patch allows local sockets to make use of XFRM GSO code path. Signed-off-by: Steffen Klassert Signed-off-by: Ilan Tayari --- include/net/xfrm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5a360100136c..18d7de34a5c3 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1858,6 +1858,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) +{ + struct xfrm_state *x = dst->xfrm; + + if (!x || !x->type_offload) + return false; + + if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) && + !dst->child->xfrm) + return true; + + return false; +} + static inline void xfrm_dev_state_delete(struct xfrm_state *x) { struct xfrm_state_offload *xso = &x->xso; @@ -1900,6 +1914,11 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x { return false; } + +static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) +{ + return false; +} #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) -- cgit From 278982b50b28d654ff8cca0efb1ff6e835c6cc4c Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Wed, 2 Aug 2017 16:01:27 +0800 Subject: ASoC: rt5663: Seprate the DC offset between headphone and headset The patch seprates the DC offset between headphone and headset. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- include/sound/rt5663.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/rt5663.h b/include/sound/rt5663.h index be6f1b2d8fbd..7d00e5849706 100644 --- a/include/sound/rt5663.h +++ b/include/sound/rt5663.h @@ -14,6 +14,8 @@ struct rt5663_platform_data { unsigned int dc_offset_l_manual; unsigned int dc_offset_r_manual; + unsigned int dc_offset_l_manual_mic; + unsigned int dc_offset_r_manual_mic; }; #endif -- cgit From 2a04aabf5c96c9e25df488949b21223bcc623815 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 1 Aug 2017 12:25:01 +0200 Subject: netfilter: constify nf_conntrack_l3/4proto parameters When a nf_conntrack_l3/4proto parameter is not on the left hand side of an assignment, its address is not taken, and it is not passed to a function that may modify its fields, then it can be declared as const. This change is useful from a documentation point of view, and can possibly facilitate making some nf_conntrack_l3/4proto structures const subsequently. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 14 +++++++------- include/net/netfilter/nf_conntrack_timeout.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7032e044bbe2..b6e27cafb1d9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,23 +125,23 @@ struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); +void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ int nf_ct_l4proto_pernet_register_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_pernet_unregister_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto[], + struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto[], - unsigned int num_proto); + struct nf_conntrack_l4proto *const proto[], + unsigned int num_proto); /* Protocol global registration. */ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], unsigned int num_proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index d40b89355fdd..b222957062b5 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, static inline unsigned int * nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; -- cgit From b2f9d432deebab5096aad5942c2f2b1ec2865f5a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 1 Aug 2017 13:18:09 -0700 Subject: flow_dissector: remove unused functions They are introduced by commit f70ea018da06 ("net: Add functions to get skb->hash based on flow structures") but never gets used in tree. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f9f1b2715ec..be76082f48aa 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1158,8 +1158,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6); - static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) { if (!skb->l4_hash && !skb->sw_hash) { @@ -1172,20 +1170,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl); - -static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) -{ - if (!skb->l4_hash && !skb->sw_hash) { - struct flow_keys keys; - __u32 hash = __get_hash_from_flowi4(fl4, &keys); - - __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); - } - - return skb->hash; -} - __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) -- cgit From 0679dee03c6d706d57145ea92c23d08fa10a1999 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 2 Aug 2017 17:55:29 +0100 Subject: cgroup: keep track of number of descent cgroups Keep track of the number of online and dying descent cgroups. This data will be used later to add an ability to control cgroup hierarchy (limit the depth and the number of descent cgroups) and display hierarchy stats. Signed-off-by: Roman Gushchin Suggested-by: Tejun Heo Signed-off-by: Tejun Heo Cc: Zefan Li Cc: Waiman Long Cc: Johannes Weiner Cc: kernel-team@fb.com Cc: cgroups@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- include/linux/cgroup-defs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9d741959f218..58b4c425a155 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -273,6 +273,14 @@ struct cgroup { */ int level; + /* + * Keep track of total numbers of visible and dying descent cgroups. + * Dying cgroups are cgroups which were deleted by a user, + * but are still existing because someone else is holding a reference. + */ + int nr_descendants; + int nr_dying_descendants; + /* * Each non-empty css_set associated with this cgroup contributes * one to nr_populated_csets. The counter is zero iff this cgroup -- cgit From 1a926e0bbab83bae8207d05a533173425e0496d1 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Jul 2017 18:28:44 +0100 Subject: cgroup: implement hierarchy limits Creating cgroup hierearchies of unreasonable size can affect overall system performance. A user might want to limit the size of cgroup hierarchy. This is especially important if a user is delegating some cgroup sub-tree. To address this issue, introduce an ability to control the size of cgroup hierarchy. The cgroup.max.descendants control file allows to set the maximum allowed number of descendant cgroups. The cgroup.max.depth file controls the maximum depth of the cgroup tree. Both are single value r/w files, with "max" default value. The control files exist on each hierarchy level (including root). When a new cgroup is created, we check the total descendants and depth limits on each level, and if none of them are exceeded, a new cgroup is created. Only alive cgroups are counted, removed (dying) cgroups are ignored. Signed-off-by: Roman Gushchin Suggested-by: Tejun Heo Signed-off-by: Tejun Heo Cc: Zefan Li Cc: Waiman Long Cc: Johannes Weiner Cc: kernel-team@fb.com Cc: cgroups@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- include/linux/cgroup-defs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 58b4c425a155..59e4ad9e7bac 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -273,13 +273,18 @@ struct cgroup { */ int level; + /* Maximum allowed descent tree depth */ + int max_depth; + /* * Keep track of total numbers of visible and dying descent cgroups. * Dying cgroups are cgroups which were deleted by a user, * but are still existing because someone else is holding a reference. + * max_descendants is a maximum allowed number of descent cgroups. */ int nr_descendants; int nr_dying_descendants; + int max_descendants; /* * Each non-empty css_set associated with this cgroup contributes -- cgit From 1e140df0496541c473e5d40b0da5d2d626b2e343 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 24 Jul 2017 22:56:43 -0700 Subject: remoteproc: qcom: Add support for SSR notifications This adds the remoteproc part of subsystem restart, which is responsible for emitting notifications to other processors in the system about a dying remoteproc instance. These notifications are propagated to the various communication systems in the various remote processors to shut down communication links that was left in a dangling state as the remoteproc was stopped (or crashed). Signed-off-by: Bjorn Andersson --- include/linux/remoteproc/qcom_rproc.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/remoteproc/qcom_rproc.h (limited to 'include') diff --git a/include/linux/remoteproc/qcom_rproc.h b/include/linux/remoteproc/qcom_rproc.h new file mode 100644 index 000000000000..fa8e38681b4b --- /dev/null +++ b/include/linux/remoteproc/qcom_rproc.h @@ -0,0 +1,22 @@ +#ifndef __QCOM_RPROC_H__ +#define __QCOM_RPROC_H__ + +struct notifier_block; + +#if IS_ENABLED(CONFIG_QCOM_RPROC_COMMON) + +int qcom_register_ssr_notifier(struct notifier_block *nb); +void qcom_unregister_ssr_notifier(struct notifier_block *nb); + +#else + +static inline int qcom_register_ssr_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline void qcom_unregister_ssr_notifier(struct notifier_block *nb) {} + +#endif + +#endif -- cgit From 2202e35d47fff379fc744e6bd3c111b018cc77df Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Aug 2017 09:56:06 +0200 Subject: ipv4: fib: Remove unused functions Previous patches converted users of these functions to provide offload indication using the nexthop's flags instead of the FIB info's. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 41d580c6185f..ef8992d49bc3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -124,7 +124,6 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif - unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -177,18 +176,6 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); -static inline void fib_info_offload_inc(struct fib_info *fi) -{ - fi->fib_offload_cnt++; - fi->fib_flags |= RTNH_F_OFFLOAD; -} - -static inline void fib_info_offload_dec(struct fib_info *fi) -{ - if (--fi->fib_offload_cnt == 0) - fi->fib_flags &= ~RTNH_F_OFFLOAD; -} - #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ -- cgit From 957b8dffa4e3d191f0f1571d006d0e520790dcb9 Mon Sep 17 00:00:00 2001 From: João Paulo Rechi Vita Date: Mon, 24 Jul 2017 14:22:25 -0700 Subject: HID: multitouch: Support Asus T304UA media keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Asus T304UA convertible sports a magnetic detachable keyboard with touchpad, which is connected over USB. Most of the keyboard hotkeys are exposed through the same USB interface as the touchpad, defined in the report descriptor as follows: 0x06, 0x31, 0xFF, // Usage Page (Vendor Defined 0xFF31) 0x09, 0x76, // Usage (0x76) 0xA1, 0x01, // Collection (Application) 0x05, 0xFF, // Usage Page (Reserved 0xFF) 0x85, 0x5A, // Report ID (90) 0x19, 0x00, // Usage Minimum (0x00) 0x2A, 0xFF, 0x00, // Usage Maximum (0xFF) 0x15, 0x00, // Logical Minimum (0) 0x26, 0xFF, 0x00, // Logical Maximum (255) 0x75, 0x08, // Report Size (8) 0x95, 0x0F, // Report Count (15) 0xB1, 0x02, // Feature (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) 0x05, 0xFF, // Usage Page (Reserved 0xFF) 0x85, 0x5A, // Report ID (90) 0x19, 0x00, // Usage Minimum (0x00) 0x2A, 0xFF, 0x00, // Usage Maximum (0xFF) 0x15, 0x00, // Logical Minimum (0) 0x26, 0xFF, 0x00, // Logical Maximum (255) 0x75, 0x08, // Report Size (8) 0x95, 0x02, // Report Count (2) 0x81, 0x02, // Input (Data,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) 0xC0, // End Collection This UsagePage is declared as a variable, but we need to treat it as an array to be able to map each Usage we care about to its corresponding input key. Signed-off-by: João Paulo Rechi Vita Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5006f9b5d837..a08e6b15d98d 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -173,6 +173,7 @@ struct hid_item { #define HID_UP_LOGIVENDOR3 0xff430000 #define HID_UP_LNVENDOR 0xffa00000 #define HID_UP_SENSOR 0x00200000 +#define HID_UP_ASUSVENDOR 0xff310000 #define HID_USAGE 0x0000ffff @@ -292,6 +293,7 @@ struct hid_item { #define HID_DG_BARRELSWITCH2 0x000d005a #define HID_DG_TOOLSERIALNUMBER 0x000d005b +#define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076 /* * HID report types --- Ouch! HID spec says 1 2 3! */ -- cgit From d4cb37e71662dac72049bf7c55a9038bd7d2bcb5 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 16 May 2017 00:17:42 +0200 Subject: mtd: nand: Remove support for block locking/unlocking Commit 7d70f334ad2b ("mtd: nand: add lock/unlock routines") introduced support for the Micron LOCK/UNLOCK commands but no one ever used the nand_lock/unlock() functions. Remove support for these vendor-specific operations from the core. If one ever wants to add them back they should be put in nand_micron.c and mtd->_lock/_unlock should be directly assigned from there instead of exporting the functions. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 892148c448cc..8a6522e82f03 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -44,12 +44,6 @@ void nand_release(struct mtd_info *mtd); /* Internal helper for board drivers which need to override command function */ void nand_wait_ready(struct mtd_info *mtd); -/* locks all blocks present in the device */ -int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); - -/* unlocks specified locked blocks */ -int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); - /* The maximum number of NAND chips in an array */ #define NAND_MAX_CHIPS 8 @@ -89,10 +83,6 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); #define NAND_CMD_SET_FEATURES 0xef #define NAND_CMD_RESET 0xff -#define NAND_CMD_LOCK 0x2a -#define NAND_CMD_UNLOCK1 0x23 -#define NAND_CMD_UNLOCK2 0x24 - /* Extended commands for large page devices */ #define NAND_CMD_READSTART 0x30 #define NAND_CMD_RNDOUTSTART 0xE0 -- cgit From e61e4055b165f4c645ce2a85890b313abf841f67 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:09 +0800 Subject: sctp: remove the typedef sctp_shutdownhdr_t This patch is to remove the typedef sctp_shutdownhdr_t, and replace with struct sctp_shutdownhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index ba007163acfd..7a586ba7dcd4 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -416,13 +416,13 @@ struct sctp_abort_chunk { /* For the graceful shutdown we must carry the tag (in common header) * and the highest consecutive acking value. */ -typedef struct sctp_shutdownhdr { +struct sctp_shutdownhdr { __be32 cum_tsn_ack; -} sctp_shutdownhdr_t; +}; struct sctp_shutdown_chunk_t { struct sctp_chunkhdr chunk_hdr; - sctp_shutdownhdr_t shutdown_hdr; + struct sctp_shutdownhdr shutdown_hdr; }; /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ -- cgit From ac23e68133f4570f40ec0910286ced08ced2d378 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:10 +0800 Subject: sctp: fix the name of struct sctp_shutdown_chunk_t This patch is to fix the name of struct sctp_shutdown_chunk_t , replace with struct sctp_initack_chunk in the places where it's using it. It is also to fix some indent problem. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 7a586ba7dcd4..553020cbf6f7 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -420,7 +420,7 @@ struct sctp_shutdownhdr { __be32 cum_tsn_ack; }; -struct sctp_shutdown_chunk_t { +struct sctp_shutdown_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_shutdownhdr shutdown_hdr; }; -- cgit From d8238d9dab8fbea22dd04f4e77639c7f7b83eef7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:11 +0800 Subject: sctp: remove the typedef sctp_errhdr_t This patch is to remove the typedef sctp_errhdr_t, and replace with struct sctp_errhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- include/net/sctp/sctp.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 553020cbf6f7..d35bdd30fa0f 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -427,15 +427,15 @@ struct sctp_shutdown_chunk { /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ -typedef struct sctp_errhdr { +struct sctp_errhdr { __be16 cause; __be16 length; __u8 variable[0]; -} sctp_errhdr_t; +}; typedef struct sctp_operr_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_errhdr_t err_hdr; + struct sctp_errhdr err_hdr; } sctp_operr_chunk_t; /* RFC 2960 3.3.10 - Operation Error diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 45fd4c6056b5..84650fed1e6a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -479,13 +479,13 @@ for (pos.v = chunk->member;\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ -for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ +for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ - ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ + ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ - ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) + ntohs(err->length) >= sizeof(struct sctp_errhdr); \ + err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) -- cgit From 87caeba7914979ef6a5765dfa3163b51a30133ab Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:12 +0800 Subject: sctp: remove the typedef sctp_operr_chunk_t This patch is to remove the typedef sctp_operr_chunk_t, and replace with struct sctp_operr_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d35bdd30fa0f..07c12e98fa85 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -433,10 +433,10 @@ struct sctp_errhdr { __u8 variable[0]; }; -typedef struct sctp_operr_chunk { +struct sctp_operr_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_errhdr err_hdr; -} sctp_operr_chunk_t; +}; /* RFC 2960 3.3.10 - Operation Error * -- cgit From 2a4932167772874c5bc4b3dfebf61cfadb5554b9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:13 +0800 Subject: sctp: remove the typedef sctp_error_t This patch is to remove the typedef sctp_error_t, and replace with enum sctp_error in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 07c12e98fa85..c74ea93e36eb 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -457,7 +457,7 @@ struct sctp_operr_chunk { * 9 No User Data * 10 Cookie Received While Shutting Down */ -typedef enum { +enum sctp_error { SCTP_ERROR_NO_ERROR = cpu_to_be16(0x00), SCTP_ERROR_INV_STRM = cpu_to_be16(0x01), @@ -512,7 +512,7 @@ typedef enum { * 0x0105 Unsupported HMAC Identifier */ SCTP_ERROR_UNSUP_HMAC = cpu_to_be16(0x0105) -} sctp_error_t; +}; -- cgit From 1fb6d83bd37dc7d0949b8a8005f7c24dddc3ee1e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:14 +0800 Subject: sctp: remove the typedef sctp_ecnehdr_t This patch is to remove the typedef sctp_ecnehdr_t, and replace with struct sctp_ecnehdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c74ea93e36eb..5ea739b99f8d 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -519,13 +519,13 @@ enum sctp_error { /* RFC 2960. Appendix A. Explicit Congestion Notification. * Explicit Congestion Notification Echo (ECNE) (12) */ -typedef struct sctp_ecnehdr { +struct sctp_ecnehdr { __be32 lowest_tsn; -} sctp_ecnehdr_t; +}; typedef struct sctp_ecne_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_ecnehdr_t ence_hdr; + struct sctp_ecnehdr ence_hdr; } sctp_ecne_chunk_t; /* RFC 2960. Appendix A. Explicit Congestion Notification. -- cgit From b515fd27591dea06bc9f0178fba361b43b76ab6b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:15 +0800 Subject: sctp: remove the typedef sctp_ecne_chunk_t This patch is to remove the typedef sctp_ecne_chunk_t, and replace with struct sctp_ecne_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 5ea739b99f8d..026bbdfcaf44 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -523,10 +523,10 @@ struct sctp_ecnehdr { __be32 lowest_tsn; }; -typedef struct sctp_ecne_chunk { +struct sctp_ecne_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_ecnehdr ence_hdr; -} sctp_ecne_chunk_t; +}; /* RFC 2960. Appendix A. Explicit Congestion Notification. * Congestion Window Reduced (CWR) (13) -- cgit From 65f77105438a7793836d7ba2d9a05fa585b8caf9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:16 +0800 Subject: sctp: remove the typedef sctp_cwrhdr_t This patch is to remove the typedef sctp_cwrhdr_t, and replace with struct sctp_cwrhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 026bbdfcaf44..3c8c418425e6 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -531,13 +531,13 @@ struct sctp_ecne_chunk { /* RFC 2960. Appendix A. Explicit Congestion Notification. * Congestion Window Reduced (CWR) (13) */ -typedef struct sctp_cwrhdr { +struct sctp_cwrhdr { __be32 lowest_tsn; -} sctp_cwrhdr_t; +}; typedef struct sctp_cwr_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_cwrhdr_t cwr_hdr; + struct sctp_cwrhdr cwr_hdr; } sctp_cwr_chunk_t; /* PR-SCTP -- cgit From 05b25d0ba68d5b6d5d06f7c6cc3fec49ffa2df38 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:17 +0800 Subject: sctp: remove the typedef sctp_cwr_chunk_t Remove this typedef including the struct, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 3c8c418425e6..2a97639eb035 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -535,11 +535,6 @@ struct sctp_cwrhdr { __be32 lowest_tsn; }; -typedef struct sctp_cwr_chunk { - struct sctp_chunkhdr chunk_hdr; - struct sctp_cwrhdr cwr_hdr; -} sctp_cwr_chunk_t; - /* PR-SCTP * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN) * -- cgit From 8b32f2348a0441ef202562618b13d1bb494f3a47 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:18 +0800 Subject: sctp: remove the typedef sctp_addip_param_t This patch is to remove the typedef sctp_addip_param_t, and replace with struct sctp_addip_param in the places where it's using this typedef. It is to use sizeof(variable) instead of sizeof(type), and also fix some indent problems. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 2a97639eb035..5b7d6d9d3fc5 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -629,10 +629,10 @@ struct sctp_fwdtsn_chunk { * The ASCONF Parameter Response is used in the ASCONF-ACK to * report status of ASCONF processing. */ -typedef struct sctp_addip_param { - struct sctp_paramhdr param_hdr; - __be32 crr_id; -} sctp_addip_param_t; +struct sctp_addip_param { + struct sctp_paramhdr param_hdr; + __be32 crr_id; +}; typedef struct sctp_addiphdr { __be32 serial; -- cgit From 65205cc465e9b37abbdbb3d595c46081b97e35bc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:19 +0800 Subject: sctp: remove the typedef sctp_addiphdr_t This patch is to remove the typedef sctp_addiphdr_t, and replace with struct sctp_addiphdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 5b7d6d9d3fc5..83dac9b0b4bd 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -634,14 +634,14 @@ struct sctp_addip_param { __be32 crr_id; }; -typedef struct sctp_addiphdr { +struct sctp_addiphdr { __be32 serial; __u8 params[0]; -} sctp_addiphdr_t; +}; typedef struct sctp_addip_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_addiphdr_t addip_hdr; + struct sctp_addiphdr addip_hdr; } sctp_addip_chunk_t; /* AUTH -- cgit From 68d75469468620c37cd58dc352f1dcec8c3896b6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:20 +0800 Subject: sctp: remove the typedef sctp_addip_chunk_t This patch is to remove the typedef sctp_addip_chunk_t, and replace with struct sctp_addip_chunk in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 83dac9b0b4bd..e7b439ed6371 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -639,10 +639,10 @@ struct sctp_addiphdr { __u8 params[0]; }; -typedef struct sctp_addip_chunk { +struct sctp_addip_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_addiphdr addip_hdr; -} sctp_addip_chunk_t; +}; /* AUTH * Section 4.1 Authentication Chunk (AUTH) -- cgit From 96f7ef4d58100d7168e0c4d01a59fbd8589f5756 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:21 +0800 Subject: sctp: remove the typedef sctp_authhdr_t This patch is to remove the typedef sctp_authhdr_t, and replace with struct sctp_authhdr in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index e7b439ed6371..b7603f5efebe 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -693,15 +693,15 @@ struct sctp_addip_chunk { * HMAC: n bytes (unsigned integer) This hold the result of the HMAC * calculation. */ -typedef struct sctp_authhdr { +struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; __u8 hmac[0]; -} sctp_authhdr_t; +}; typedef struct sctp_auth_chunk { struct sctp_chunkhdr chunk_hdr; - sctp_authhdr_t auth_hdr; + struct sctp_authhdr auth_hdr; } sctp_auth_chunk_t; struct sctp_infox { -- cgit From bb96dec74543bb3ceb4ac5caf39341dadb4cb559 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 15:42:22 +0800 Subject: sctp: remove the typedef sctp_auth_chunk_t This patch is to remove the typedef sctp_auth_chunk_t, and replace with struct sctp_auth_chunk in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b7603f5efebe..82b171e1aa0b 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -699,10 +699,10 @@ struct sctp_authhdr { __u8 hmac[0]; }; -typedef struct sctp_auth_chunk { +struct sctp_auth_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_authhdr auth_hdr; -} sctp_auth_chunk_t; +}; struct sctp_infox { struct sctp_info *sctpinfo; -- cgit From f89823c212246d0671cc51e69894a3df1a743aee Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 3 Aug 2017 18:05:50 +0100 Subject: drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface The motivation behind this new interface is expose at runtime the creation of new OA configs which can be used as part of the i915 perf open interface. This will enable the kernel to learn new configs which may be experimental, or otherwise not part of the core set currently available through the i915 perf interface. v2: Drop DRM_ERROR for userspace errors (Matthew) Add padding to userspace structure (Matthew) s/guid/uuid/ (Matthew) v3: Use u32 instead of int to iterate through registers (Matthew) v4: Lock access to dynamic config list (Lionel) v5: by Matthew: Fix uninitialized error values Fix incorrect unwiding when opening perf stream Use kmalloc_array() to store register Use uuid_is_valid() to valid config uuids Declare ioctls as write only Check padding members are set to 0 by Lionel: Return ENOENT rather than EINVAL when trying to remove non existing config v6: by Chris: Use ref counts for OA configs Store UUID in drm_i915_perf_oa_config rather then using pointer Shuffle fields of drm_i915_perf_oa_config to avoid padding v7: by Chris Rename uapi pointers fields to end with '_ptr' v8: by Andrzej, Marek, Sebastian Update register whitelisting by Lionel Add more register names for documentation Allow configuration programming in non-paranoid mode Add support for value filter for a couple of registers already programmed in other part of the kernel v9: Documentation fix (Lionel) Allow writing WAIT_FOR_RC6_EXIT only on Gen8+ (Andrzej) v10: Perform read access_ok() on register pointers (Lionel) Signed-off-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Andrzej Datczuk Reviewed-by: Andrzej Datczuk Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-2-lionel.g.landwerlin@intel.com --- include/uapi/drm/i915_drm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ccbd6a2bbe0..ce3833fa1e06 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1467,6 +1471,22 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; +/** + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + char uuid[36]; + + __u32 n_mux_regs; + __u32 n_boolean_regs; + __u32 n_flex_regs; + + __u64 __user mux_regs_ptr; + __u64 __user boolean_regs_ptr; + __u64 __user flex_regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit From 630b3aff8a51c90ef15b59c9560ac35e40e7ec09 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 1 Aug 2017 14:10:41 +0200 Subject: treewide: Consolidate Apple DMI checks We're about to amend ACPI bus scan with DMI checks whether we're running on a Mac to support Apple device properties in AML. The DMI checks are performed for every single device, adding overhead for everything x86 that isn't Apple, which is the majority. Rafael and Andy therefore request to perform the DMI match only once and cache the result. Outside of ACPI various other Apple DMI checks exist and it seems reasonable to use the cached value there as well. Rafael, Andy and Darren suggest performing the DMI check in arch code and making it available with a header in include/linux/platform_data/x86/. To this end, add early_platform_quirks() to arch/x86/kernel/quirks.c to perform the DMI check and invoke it from setup_arch(). Switch over all existing Apple DMI checks, thereby fixing two deficiencies: * They are now #defined to false on non-x86 arches and can thus be optimized away if they're located in cross-arch code. * Some of them only match "Apple Inc." but not "Apple Computer, Inc.", which is used by BIOSes released between January 2006 (when the first x86 Macs started shipping) and January 2007 (when the company name changed upon introduction of the iPhone). Suggested-by: Andy Shevchenko Suggested-by: Rafael J. Wysocki Suggested-by: Darren Hart Signed-off-by: Lukas Wunner Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/platform_data/x86/apple.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/linux/platform_data/x86/apple.h (limited to 'include') diff --git a/include/linux/platform_data/x86/apple.h b/include/linux/platform_data/x86/apple.h new file mode 100644 index 000000000000..079e816c3c21 --- /dev/null +++ b/include/linux/platform_data/x86/apple.h @@ -0,0 +1,13 @@ +#ifndef PLATFORM_DATA_X86_APPLE_H +#define PLATFORM_DATA_X86_APPLE_H + +#ifdef CONFIG_X86 +/** + * x86_apple_machine - whether the machine is an x86 Apple Macintosh + */ +extern bool x86_apple_machine; +#else +#define x86_apple_machine false +#endif + +#endif -- cgit From 004cb784018eaa2a078912180bd9999707c3c483 Mon Sep 17 00:00:00 2001 From: Matthew Minter Date: Mon, 31 Jul 2017 17:37:57 +0100 Subject: PCI: Remove unused pci_fixup_irqs() function Now we have removed all callers of pci_fixup_irqs() and migrated everything to pci_assign_irq(), delete the pci_fixup_irqs() function completely. Signed-off-by: Matthew Minter [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..69034ab8a68e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1160,8 +1160,6 @@ void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); -void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), - int (*)(const struct pci_dev *, u8, u8)); void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); #define HAVE_PCI_REQ_REGIONS 2 -- cgit From 4eebedd8f1a6609739c2e9a9b020791b23cbcceb Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Thu, 3 Aug 2017 14:26:19 +0800 Subject: ACPICA: Divergences: reduce access size definitions ACPICA commit cf27b3c98883d2a15d932016792fcb8272ace96d The following commit introduces definition of access width to ACPICA. Commit: 2bece49394872d36bbc5767fd643deac05920c55 Subject: ACPI: SPCR: Use access width to determine mmio usage Actually the access bit width can be calculated via access width. It would be better to define a macro calculating bit width rather than defining fixed values. This patch thus cleans up the definitions to reduce divergences. Link: https://github.com/acpica/acpica/commit/cf27b3c9 Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acrestyp.h | 7 ------- include/acpi/actypes.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h index 4f7f39a02820..343dbdcef20c 100644 --- a/include/acpi/acrestyp.h +++ b/include/acpi/acrestyp.h @@ -377,13 +377,6 @@ struct acpi_resource_generic_register { u64 address; }; -/* Generic Address Space Access Sizes */ -#define ACPI_ACCESS_SIZE_UNDEFINED 0 -#define ACPI_ACCESS_SIZE_BYTE 1 -#define ACPI_ACCESS_SIZE_WORD 2 -#define ACPI_ACCESS_SIZE_DWORD 3 -#define ACPI_ACCESS_SIZE_QWORD 4 - struct acpi_resource_gpio { u8 revision_id; u8 connection_type; diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 2fcbaec8b368..0260be595d40 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -554,6 +554,13 @@ typedef u64 acpi_integer; #define ACPI_VALIDATE_RSDP_SIG(a) (!strncmp (ACPI_CAST_PTR (char, (a)), ACPI_SIG_RSDP, 8)) #define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) +/* + * Algorithm to obtain access bit width. + * Can be used with access_width of struct acpi_generic_address and access_size of + * struct acpi_resource_generic_register. + */ +#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) + /******************************************************************************* * * Miscellaneous constants -- cgit From 3bd38469df0a0f4b3c73e490538c8de104914a4a Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 3 Aug 2017 14:26:25 +0800 Subject: ACPICA: iASL: Add support for the SDEI table ACPICA commit 5ad4f0b7bf9e7ba175bd320cf7950f3b38799ff3 ACPI 6.2 adds support for the Software Delegated Exception Interface, which is described by "Software Delegated Exception Interface (SDEI)" ARM DEN0054A. Add the necessary types in the ACPICA header files and support for compiling/decompiling the table. Link: https://github.com/acpica/acpica/commit/5ad4f0b7 Signed-off-by: James Morse Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index d568948d3a82..686b6f8c09dc 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -76,6 +76,7 @@ #define ACPI_SIG_MCHI "MCHI" /* Management Controller Host Interface table */ #define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */ #define ACPI_SIG_MTMR "MTMR" /* MID Timer table */ +#define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SLIC "SLIC" /* Software Licensing Description Table */ #define ACPI_SIG_SPCR "SPCR" /* Serial Port Console Redirection table */ #define ACPI_SIG_SPMI "SPMI" /* Server Platform Management Interface table */ @@ -1132,6 +1133,19 @@ struct acpi_mtmr_entry { u32 irq; }; +/******************************************************************************* + * + * SDEI - Software Delegated Exception Interface Descriptor Table + * + * Conforms to "Software Delegated Exception Interface (SDEI)" ARM DEN0054A, + * May 8th, 2017. Copyright 2017 ARM Ltd. + * + ******************************************************************************/ + +struct acpi_table_sdei { + struct acpi_table_header header; /* Common ACPI table header */ +}; + /******************************************************************************* * * SLIC - Software Licensing Description Table -- cgit From d5f23fe192b5b549a85e98ee9f0a3b8a0f9c04c3 Mon Sep 17 00:00:00 2001 From: Shao Ming Date: Thu, 3 Aug 2017 14:26:44 +0800 Subject: ACPICA: EFI/EDK2: Sort acpi.h inclusion order ACPICA commit 43ff22215f0fcd8ca80abec5712a07d2cc9a801d acpi.h inclusion order need to be changed to build EDK-II ports of acpidump on Windows as va_list is used before it's definition in that environment. As we only need to ensure order of acenv.h/acenvex.h to be pre/post ACPICA type definitions, inclusion order is changed to make MSVC builds happy. Lv Zheng. Link: https://github.com/acpica/acpica/commit/43ff2221 Signed-off-by: "Shao, Ming" Signed-off-by: Lv Zheng Signed-off-by: Shao Ming Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h index ca036620703c..0887d7cbb7e1 100644 --- a/include/acpi/acpi.h +++ b/include/acpi/acpi.h @@ -58,10 +58,10 @@ #include /* ACPICA data types and structures */ #include /* ACPICA exceptions */ #include /* ACPI table definitions */ -#include /* Error output and Debug macros */ #include /* Resource Descriptor structs */ +#include /* Extra environment-specific items */ +#include /* Error output and Debug macros */ #include /* OSL interfaces (ACPICA-to-OS) */ #include /* ACPI core subsystem external interfaces */ -#include /* Extra environment-specific items */ #endif /* __ACPI_H__ */ -- cgit From 65082bfcb486ef06f93b7603a9045cdf0c0022e8 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Thu, 3 Aug 2017 14:26:50 +0800 Subject: ACPICA: CLib: Add short multiply/shift support ACPICA commit 01b8f5a2350b9cc329cd8402ac8faec36fc501f5 In order to build ACPICA EFI tools with EDK-II on Windows, 64-bit multiply/shift supports are also required to be implemented. Otherwise, MSVC complains: acpidump.lib(utstrtoul64.obj) : error LNK2001: unresolved external symbol __allmul acpidump.lib(uthex.obj) : error LNK2001: unresolved external symbol __aullshr Note: 1. This patch also splits _EDK2_EFI from _GNU_EFI as they might have different math64 supports. 2. Support of gcc math64 is not included in this patch. 3. Support of EDK2 arch independent math64 is done via linking to base_lib. This patch fixes this issue. Reported by Shao Ming, fixed by Lv Zheng. For Linux kernel, this patch is a functional no-op. Link: https://github.com/acpica/acpica/commit/01b8f5a2 Tested-by: "Shao, Ming" Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 1 + include/acpi/platform/acgcc.h | 4 ++++ include/acpi/platform/aclinux.h | 2 ++ 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 0260be595d40..dc3bb153420a 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -166,6 +166,7 @@ typedef u64 acpi_physical_address; #define ACPI_SIZE_MAX ACPI_UINT64_MAX #define ACPI_USE_NATIVE_DIVIDE /* Has native 64-bit integer support */ +#define ACPI_USE_NATIVE_MATH64 /* Has native 64-bit integer support */ /* * In the case of the Itanium Processor Family (IPF), the hardware does not diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 97a7e21cfbe0..9c8f8b79644e 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -84,4 +84,8 @@ typedef __builtin_va_list va_list; #define COMPILER_VA_MACRO 1 +/* GCC supports native multiply/shift on 32-bit platforms */ + +#define ACPI_USE_NATIVE_MATH64 + #endif /* __ACGCC_H__ */ diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index afd95f28c8f0..1b473efd9eb6 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -128,6 +128,7 @@ /* Host-dependent types and defines for in-kernel ACPICA */ #define ACPI_MACHINE_WIDTH BITS_PER_LONG +#define ACPI_USE_NATIVE_MATH64 #define ACPI_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL(symbol); #define strtoul simple_strtoul @@ -216,6 +217,7 @@ #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long #define ACPI_USE_NATIVE_DIVIDE +#define ACPI_USE_NATIVE_MATH64 #endif #ifndef __cdecl -- cgit From 137c78352ea98db70d0f2aa648f723bb444faee8 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 3 Aug 2017 14:27:35 +0800 Subject: ACPICA: Resources: Allow _DMA method in walk resources ACPICA commit af661c00afac7aa481a961fa48c6540a99ad64a6 The _DMA object contains a resource template, this change adds support for the walk resources function so that ACPI devices containing a _DMA object can actually parse it to detect DMA ranges for the respective bus. Link: https://github.com/acpica/acpica/commit/af661c00 Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acnames.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h index b421584033a5..d8dd3bf51ca7 100644 --- a/include/acpi/acnames.h +++ b/include/acpi/acnames.h @@ -54,6 +54,7 @@ #define METHOD_NAME__CLS "_CLS" #define METHOD_NAME__CRS "_CRS" #define METHOD_NAME__DDN "_DDN" +#define METHOD_NAME__DMA "_DMA" #define METHOD_NAME__HID "_HID" #define METHOD_NAME__INI "_INI" #define METHOD_NAME__PLD "_PLD" -- cgit From 4a9673dd50c2362f5bd2d03c89099267b49d400a Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 3 Aug 2017 14:27:48 +0800 Subject: ACPICA: Update version to 20170728 ACPICA commit 50f61636543a55d143c792a4814da7700a7fafc2 Version 20170728. Link: https://github.com/acpica/acpica/commit/50f61636 Signed-off-by: Bob Moore Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 06b87adc97ef..53c5e2f7bcec 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20170629 +#define ACPI_CA_VERSION 0x20170728 #include #include -- cgit From 04b1d4e50e82536c12da00ee04a77510c459c844 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:11 +0200 Subject: net: core: Make the FIB notification chain generic The FIB notification chain is currently soley used by IPv4 code. However, we're going to introduce IPv6 FIB offload support, which requires these notification as well. As explained in commit c3852ef7f2f8 ("ipv4: fib: Replay events when registering FIB notifier"), upon registration to the chain, the callee receives a full dump of the FIB tables and rules by traversing all the net namespaces. The integrity of the dump is ensured by a per-namespace sequence counter that is incremented whenever a change to the tables or rules occurs. In order to allow more address families to use the chain, each family is expected to register its fib_notifier_ops in its pernet init. These operations allow the common code to read the family's sequence counter as well as dump its tables and rules in the given net namespace. Additionally, a 'family' parameter is added to sent notifications, so that listeners could distinguish between the different families. Implement the common code that allows listeners to register to the chain and for address families to register their fib_notifier_ops. Subsequent patches will implement these operations in IPv6. In the future, ipmr and ip6mr will be extended to provide these notifications as well. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/net/ip_fib.h | 30 ++++++++---------------------- include/net/net_namespace.h | 1 + include/net/netns/ipv4.h | 1 + 4 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 include/net/fib_notifier.h (limited to 'include') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h new file mode 100644 index 000000000000..241475224f74 --- /dev/null +++ b/include/net/fib_notifier.h @@ -0,0 +1,44 @@ +#ifndef __NET_FIB_NOTIFIER_H +#define __NET_FIB_NOTIFIER_H + +#include +#include +#include + +struct fib_notifier_info { + struct net *net; + int family; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_REPLACE, + FIB_EVENT_ENTRY_APPEND, + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, + FIB_EVENT_NH_ADD, + FIB_EVENT_NH_DEL, +}; + +struct fib_notifier_ops { + int family; + struct list_head list; + unsigned int (*fib_seq_read)(struct net *net); + int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct rcu_head rcu; +}; + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); +int unregister_fib_notifier(struct notifier_block *nb); +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net); +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops); + +#endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ef8992d49bc3..c0295c3ec5f3 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -188,10 +189,6 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) -struct fib_notifier_info { - struct net *net; -}; - struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ u32 dst; @@ -212,25 +209,14 @@ struct fib_nh_notifier_info { struct fib_nh *fib_nh; }; -enum fib_event_type { - FIB_EVENT_ENTRY_REPLACE, - FIB_EVENT_ENTRY_APPEND, - FIB_EVENT_ENTRY_ADD, - FIB_EVENT_ENTRY_DEL, - FIB_EVENT_RULE_ADD, - FIB_EVENT_RULE_DEL, - FIB_EVENT_NH_ADD, - FIB_EVENT_NH_DEL, -}; - -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)); -int unregister_fib_notifier(struct notifier_block *nb); -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info); -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, struct fib_notifier_info *info); +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib4_notifier_init(struct net *net); +void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1c401bd4c2e0..57faa375eab9 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -88,6 +88,7 @@ struct net { /* core fib_rules */ struct list_head rules_ops; + struct list_head fib_notifier_ops; /* protected by net_mutex */ struct net_device *loopback_dev; /* The loopback */ struct netns_core core; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9a14a0850b0e..20d061c805e3 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -159,6 +159,7 @@ struct netns_ipv4 { int sysctl_fib_multipath_hash_policy; #endif + struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; -- cgit From 1b2a4440858857f2f93bb2ec5bb3a60f4fcc25be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:14 +0200 Subject: net: fib_rules: Implement notification logic in core Unlike the routing tables, the FIB rules share a common core, so instead of replicating the same logic for each address family we can simply dump the rules and send notifications from the core itself. To protect the integrity of the dump, a rules-specific sequence counter is added for each address family and incremented whenever a rule is added or deleted (under RTNL). Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 +++++++++ include/net/ip_fib.h | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index c487bfa2f479..3d7f1cefc6f5 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,7 @@ #include #include #include +#include struct fib_kuid_range { kuid_t start; @@ -57,6 +58,7 @@ struct fib_rules_ops { int addr_size; int unresolved_rules; int nr_goto_rules; + unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, @@ -89,6 +91,11 @@ struct fib_rules_ops { struct rcu_head rcu; }; +struct fib_rule_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct fib_rule *rule; +}; + #define FRA_GENERIC_POLICY \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ @@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); bool fib_rule_matchall(const struct fib_rule *rule); +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family); +unsigned int fib_rules_seq_read(struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c0295c3ec5f3..1a7f7e424320 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -199,11 +199,6 @@ struct fib_entry_notifier_info { u32 tb_id; }; -struct fib_rule_notifier_info { - struct fib_notifier_info info; /* must be first */ - struct fib_rule *rule; -}; - struct fib_nh_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_nh *fib_nh; @@ -219,13 +214,6 @@ int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); void fib_notify(struct net *net, struct notifier_block *nb); -#ifdef CONFIG_IP_MULTIPLE_TABLES -void fib_rules_notify(struct net *net, struct notifier_block *nb); -#else -static inline void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ -} -#endif struct fib_table { struct hlist_node tb_hlist; @@ -298,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule) return true; } +static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static inline unsigned int fib4_rules_seq_read(struct net *net) +{ + return 0; +} + #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -343,6 +341,8 @@ out: } bool fib4_rule_default(const struct fib_rule *rule); +int fib4_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib4_rules_seq_read(struct net *net); #endif /* CONFIG_IP_MULTIPLE_TABLES */ -- cgit From e3ea973159d53559c5ae9a9dbc824da9aba6cac0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:15 +0200 Subject: ipv6: fib_rules: Check if rule is a default rule As explained in commit 3c71006d15fd ("ipv4: fib_rules: Check if rule is a default rule"), drivers supporting IPv6 FIB offload need to be able to sanitize the rules they don't support and potentially flush their tables. Add an IPv6 helper to check if a FIB rule is a default rule. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1a88008cc6f5..6000b0dc51ee 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -295,6 +295,7 @@ int ipv6_route_open(struct inode *inode, struct file *file); #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); +bool fib6_rule_default(const struct fib_rule *rule); #else static inline int fib6_rules_init(void) { @@ -304,5 +305,9 @@ static inline void fib6_rules_cleanup(void) { return ; } +static inline bool fib6_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif #endif -- cgit From 16ab6d7d4d8cc037bb4be12c2b849ac92787e1ff Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:16 +0200 Subject: ipv6: fib: Add FIB notifiers callbacks We're about to add IPv6 FIB offload support, so implement the necessary callbacks in IPv6 code, which will later allow us to add routes and rules notifications. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 11 +++++++++++ include/net/netns/ipv6.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6000b0dc51ee..be8ddf3253dc 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -292,6 +294,15 @@ int fib6_init(void); int ipv6_route_open(struct inode *inode, struct file *file); +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info); +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + +int __net_init fib6_notifier_init(struct net *net); +void __net_exit fib6_notifier_exit(struct net *net); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index de7745e2edcc..abdf3b40303b 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -86,6 +86,7 @@ struct netns_ipv6 { atomic_t dev_addr_genid; atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; + struct fib_notifier_ops *notifier_ops; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -- cgit From df77fe4d9865c6354372876632bcbceeda84f6c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:17 +0200 Subject: ipv6: fib: Add in-kernel notifications for route add / delete As with IPv4, allow listeners of the FIB notification chain to receive notifications whenever a route is added, replaced or deleted. This is done by placing calls to the FIB notification chain in the two lowest level functions that end up performing these operations - namely, fib6_add_rt2node() and fib6_del_route(). Unlike IPv4, APPEND notifications aren't sent as the kernel doesn't distinguish between "append" (NLM_F_CREATE|NLM_F_APPEND) and "prepend" (NLM_F_CREATE). If NLM_F_EXCL isn't set, duplicate routes are always added after the existing duplicate routes. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index be8ddf3253dc..e2b292b79e99 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -258,6 +258,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, struct flowi6 *, int); +struct fib6_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + struct rt6_info *rt; +}; + /* * exported functions */ -- cgit From dcb18f762f6ac83a6dc9cdc26dd694dcc167beb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:18 +0200 Subject: ipv6: fib_rules: Dump rules during registration to FIB chain Allow users of the FIB notification chain to receive a complete view of the IPv6 FIB rules upon registration to the chain. The integrity of the dump is ensured by a per-family sequence counter that is incremented (under RTNL) whenever a rule is added or deleted. All the sequence counters are read (under RTNL) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e2b292b79e99..dbe5537809f5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -312,6 +312,8 @@ void __net_exit fib6_notifier_exit(struct net *net); int fib6_rules_init(void); void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); +int fib6_rules_dump(struct net *net, struct notifier_block *nb); +unsigned int fib6_rules_seq_read(struct net *net); #else static inline int fib6_rules_init(void) { @@ -325,5 +327,13 @@ static inline bool fib6_rule_default(const struct fib_rule *rule) { return true; } +static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} +static inline unsigned int fib6_rules_seq_read(struct net *net) +{ + return 0; +} #endif #endif -- cgit From e1ee0a5ba35d999caef94d659b4cb842e63aeb68 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:19 +0200 Subject: ipv6: fib: Dump tables during registration to FIB chain Dump all the FIB tables in each net namespace upon registration to the FIB notification chain so that the callee will have a complete view of the tables. The integrity of the dump is ensured by a per-table sequence counter that is incremented (under write lock) whenever a route is added or deleted from the table. All the sequence counters are read (under each table's read lock) and summed, prior and after the dump. In case the counters differ, then the dump is either restarted or the registration fails. While it's possible for a table to be modified after its counter has been read, this isn't really a problem. In case it happened before it was read the second time, then the comparison at the end will fail. If it happened afterwards, then we're guaranteed to be notified about the change, as the notification block is registered prior to the second read. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index dbe5537809f5..0b3052157e6b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -235,6 +235,7 @@ struct fib6_table { struct fib6_node tb6_root; struct inet_peer_base tb6_peers; unsigned int flags; + unsigned int fib_seq; #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -308,6 +309,9 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib6_notifier_init(struct net *net); void __net_exit fib6_notifier_exit(struct net *net); +unsigned int fib6_tables_seq_read(struct net *net); +int fib6_tables_dump(struct net *net, struct notifier_block *nb); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); void fib6_rules_cleanup(void); -- cgit From 61e4d01e16acddadb9723143637a20417fa67ac9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:20 +0200 Subject: ipv6: fib: Add offload indication to routes Allow user space applications to see which routes are offloaded and which aren't by setting the RTNH_F_OFFLOAD flag when dumping them. To be consistent with IPv4, offload indication is provided on a per-nexthop basis. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/ipv6_route.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index d496c02e14bc..33e2a5732bd1 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,6 +35,7 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 +#define RTF_OFFLOAD 0x20000000 /* offloaded route */ #define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 -- cgit From a460aa83963b185a32a6377eb486b6e613ac8e38 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 3 Aug 2017 13:28:25 +0200 Subject: ipv6: fib: Add helpers to hold / drop a reference on rt6_info Similar to commit 1c677b3d2828 ("ipv4: fib: Add fib_info_hold() helper") and commit b423cb10807b ("ipv4: fib: Export free_fib_info()") add an helper to hold a reference on rt6_info and export rt6_release() to drop it and potentially release the route. This is needed so that drivers capable of FIB offload could hold a reference on the route before queueing it for offload and drop it after the route has been programmed to the device's tables. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0b3052157e6b..1d790ea40ea7 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -187,6 +187,22 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); + +static inline void rt6_hold(struct rt6_info *rt) +{ + atomic_inc(&rt->rt6i_ref); +} + +static inline void rt6_release(struct rt6_info *rt) +{ + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); + dst_dev_put(&rt->dst); + dst_release(&rt->dst); + } +} + enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, -- cgit From a7c391f04fe3259fb0417d71fec78ae28f25780e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 24 Jul 2017 11:28:03 +0100 Subject: crypto: algapi - use separate dst and src operands for __crypto_xor() In preparation of introducing crypto_xor_cpy(), which will use separate operands for input and output, modify the __crypto_xor() implementation, which it will share with the existing crypto_xor(), which provides the actual functionality when not using the inline version. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 436c4c2683c7..fd547f946bf8 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -192,7 +192,7 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue) } void crypto_inc(u8 *a, unsigned int size); -void __crypto_xor(u8 *dst, const u8 *src, unsigned int size); +void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int size); static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) { @@ -207,7 +207,7 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) size -= sizeof(unsigned long); } } else { - __crypto_xor(dst, src, size); + __crypto_xor(dst, dst, src, size); } } -- cgit From 45fe93dff2fb58b22de04c729f8447ba0f773d93 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 24 Jul 2017 11:28:04 +0100 Subject: crypto: algapi - make crypto_xor() take separate dst and src arguments There are quite a number of occurrences in the kernel of the pattern if (dst != src) memcpy(dst, src, walk.total % AES_BLOCK_SIZE); crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE); or crypto_xor(keystream, src, nbytes); memcpy(dst, keystream, nbytes); where crypto_xor() is preceded or followed by a memcpy() invocation that is only there because crypto_xor() uses its output parameter as one of the inputs. To avoid having to add new instances of this pattern in the arm64 code, which will be refactored to implement non-SIMD fallbacks, add an alternative implementation called crypto_xor_cpy(), taking separate input and output arguments. This removes the need for the separate memcpy(). Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/crypto/algapi.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index fd547f946bf8..e3cebf640c00 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -211,6 +211,25 @@ static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size) } } +static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2, + unsigned int size) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && + __builtin_constant_p(size) && + (size % sizeof(unsigned long)) == 0) { + unsigned long *d = (unsigned long *)dst; + unsigned long *s1 = (unsigned long *)src1; + unsigned long *s2 = (unsigned long *)src2; + + while (size > 0) { + *d++ = *s1++ ^ *s2++; + size -= sizeof(unsigned long); + } + } else { + __crypto_xor(dst, src1, src2, size); + } +} + int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err); int blkcipher_walk_virt(struct blkcipher_desc *desc, -- cgit From e652399edba99a5497f0d80f240c9075d3b43493 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 25 Jul 2017 14:12:11 -0500 Subject: crypto: ccp - Fix XTS-AES-128 support on v5 CCPs Version 5 CCPs have some new requirements for XTS-AES: the type field must be specified, and the key requires 512 bits, with each part occupying 256 bits and padded with zeroes. cc: # 4.9.x+ Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 3fd9a6dac344..7e9c991c95e0 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -1,7 +1,7 @@ /* * AMD Cryptographic Coprocessor (CCP) driver * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. * * Author: Tom Lendacky * Author: Gary R Hook @@ -229,6 +229,7 @@ enum ccp_xts_aes_unit_size { * AES operation the new IV overwrites the old IV. */ struct ccp_xts_aes_engine { + enum ccp_aes_type type; enum ccp_aes_action action; enum ccp_xts_aes_unit_size unit_size; -- cgit From 704956ecf5bcdc14d14650f39f2b545b34c96265 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 31 Jul 2017 20:19:09 +0800 Subject: f2fs: support inode checksum This patch adds to support inode checksum in f2fs. Signed-off-by: Chao Yu [Jaegeuk Kim: fix verification flow] Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 50f176683676..2a0c453d7235 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -240,6 +240,7 @@ struct f2fs_inode { __le16 i_extra_isize; /* extra inode attribute size */ __le16 i_padding; /* padding */ __le32 i_projid; /* project id */ + __le32 i_inode_checksum;/* inode meta checksum */ __le32 i_extra_end[0]; /* for attribute size calculation */ }; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ -- cgit From 98ba0bd5505dcbb90322a4be07bcfe6b8a18c73f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:37 -0400 Subject: sock: allocate skbs from optmem Add sock_omalloc and sock_ofree to be able to allocate control skbs, for instance for looping errors onto sk_error_queue. The transmit budget (sk_wmem_alloc) is involved in transmit skb shaping, most notably in TCP Small Queues. Using this budget for control packets would impact transmission. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 393c38e9f6aa..0f778d3c4300 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1531,6 +1531,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); void __sock_wfree(struct sk_buff *skb); void sock_wfree(struct sk_buff *skb); +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + gfp_t priority); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); void sock_efree(struct sk_buff *skb); -- cgit From 3ece782693c4b64d588dd217868558ab9a19bfe7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:38 -0400 Subject: sock: skb_copy_ubufs support for compound pages Refine skb_copy_ubufs to support compound pages. With upcoming TCP zerocopy sendmsg, such fragments may appear. The existing code replaces each page one for one. Splitting each compound page into an independent number of regular pages can result in exceeding limit MAX_SKB_FRAGS if data is not exactly page aligned. Instead, fill all destination pages but the last to PAGE_SIZE. Split the existing alloc + copy loop into separate stages: 1. compute bytelength and minimum number of pages to store this. 2. allocate 3. copy, filling each page except the last to PAGE_SIZE bytes 4. update skb frag array Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index be76082f48aa..2f64e2bbb592 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1796,13 +1796,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb) return skb->len - skb->data_len; } -static inline unsigned int skb_pagelen(const struct sk_buff *skb) +static inline unsigned int __skb_pagelen(const struct sk_buff *skb) { unsigned int i, len = 0; for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); - return len + skb_headlen(skb); + return len; +} + +static inline unsigned int skb_pagelen(const struct sk_buff *skb) +{ + return skb_headlen(skb) + __skb_pagelen(skb); } /** -- cgit From 52267790ef52d7513879238ca9fac22c1733e0e3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:39 -0400 Subject: sock: add MSG_ZEROCOPY The kernel supports zerocopy sendmsg in virtio and tap. Expand the infrastructure to support other socket types. Introduce a completion notification channel over the socket error queue. Notifications are returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid blocking the send/recv path on receiving notifications. Add reference counting, to support the skb split, merge, resize and clone operations possible with SOCK_STREAM and other socket types. The patch does not yet modify any datapaths. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 60 +++++++++++++++++++++++++++++++++++++++++++ include/linux/socket.h | 1 + include/net/sock.h | 2 ++ include/uapi/linux/errqueue.h | 3 +++ 4 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2f64e2bbb592..59cff7aa494e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -429,6 +429,7 @@ enum { SKBTX_SCHED_TSTAMP = 1 << 6, }; +#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) @@ -445,8 +446,28 @@ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); void *ctx; unsigned long desc; + u16 zerocopy:1; + atomic_t refcnt; }; +#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); + +static inline void sock_zerocopy_get(struct ubuf_info *uarg) +{ + atomic_inc(&uarg->refcnt); +} + +void sock_zerocopy_put(struct ubuf_info *uarg); +void sock_zerocopy_put_abort(struct ubuf_info *uarg); + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg); + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -1214,6 +1235,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } +static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) +{ + bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY; + + return is_zcopy ? skb_uarg(skb) : NULL; +} + +static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg) +{ + if (skb && uarg && !skb_zcopy(skb)) { + sock_zerocopy_get(uarg); + skb_shinfo(skb)->destructor_arg = uarg; + skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG; + } +} + +/* Release a reference on a zerocopy structure */ +static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + +/* Abort a zerocopy operation and revert zckey on error in send syscall */ +static inline void skb_zcopy_abort(struct sk_buff *skb) +{ + struct ubuf_info *uarg = skb_zcopy(skb); + + if (uarg) { + sock_zerocopy_put_abort(uarg); + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; + } +} + /** * skb_queue_empty - check if a queue is empty * @list: queue head diff --git a/include/linux/socket.h b/include/linux/socket.h index 8b13db5163cc..8ad963cdc88c 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -287,6 +287,7 @@ struct ucred { #define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */ #define MSG_EOF MSG_FIN +#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through diff --git a/include/net/sock.h b/include/net/sock.h index 0f778d3c4300..fe1a0bc25cd3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -294,6 +294,7 @@ struct sock_common { * @sk_stamp: time stamp of last packet received * @sk_tsflags: SO_TIMESTAMPING socket options * @sk_tskey: counter to disambiguate concurrent tstamp requests + * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data * @sk_frag: cached page frag @@ -462,6 +463,7 @@ struct sock { u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; + atomic_t sk_zckey; struct socket *sk_socket; void *sk_user_data; #ifdef CONFIG_SECURITY diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index 07bdce1f444a..78fdf52d6b2f 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -18,10 +18,13 @@ struct sock_extended_err { #define SO_EE_ORIGIN_ICMP 2 #define SO_EE_ORIGIN_ICMP6 3 #define SO_EE_ORIGIN_TXSTATUS 4 +#define SO_EE_ORIGIN_ZEROCOPY 5 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) +#define SO_EE_CODE_ZEROCOPY_COPIED 1 + /** * struct scm_timestamping - timestamps exposed through cmsg * -- cgit From 76851d1212c11365362525e1e2c0a18c97478e6b Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:40 -0400 Subject: sock: add SOCK_ZEROCOPY sockopt The send call ignores unknown flags. Legacy applications may already unwittingly pass MSG_ZEROCOPY. Continue to ignore this flag unless a socket opts in to zerocopy. Introduce socket option SO_ZEROCOPY to enable MSG_ZEROCOPY processing. Processes can also query this socket option to detect kernel support for the feature. Older kernels will return ENOPROTOOPT. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/uapi/asm-generic/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 9861be8da65e..e47c9e436221 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -104,4 +104,6 @@ #define SO_PEERGROUPS 59 +#define SO_ZEROCOPY 60 + #endif /* __ASM_GENERIC_SOCKET_H */ -- cgit From 1f8b977ab32dc5d148f103326e80d9097f1cefb5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:41 -0400 Subject: sock: enable MSG_ZEROCOPY Prepare the datapath for refcounted ubuf_info. Clone ubuf_info with skb_zerocopy_clone() wherever needed due to skb split, merge, resize or clone. Split skb_orphan_frags into two variants. The split, merge, .. paths support reference counted zerocopy buffers, so do not do a deep copy. Add skb_orphan_frags_rx for paths that may loop packets to receive sockets. That is not allowed, as it may cause unbounded latency. Deep copy all zerocopy copy buffers, ref-counted or not, in this path. The exact locations to modify were chosen by exhaustively searching through all code that might modify skb_frag references and/or the the SKBTX_DEV_ZEROCOPY tx_flags bit. The changes err on the safe side, in two ways. (1) legacy ubuf_info paths virtio and tap are not modified. They keep a 1:1 ubuf_info to sk_buff relationship. Calls to skb_orphan_frags still call skb_copy_ubufs and thus copy frags in this case. (2) not all copies deep in the stack are addressed yet. skb_shift, skb_split and skb_try_coalesce can be refined to avoid copying. These are not in the hot path and this patch is hairy enough as is, so that is left for future refinement. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 59cff7aa494e..e5387932c266 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2512,7 +2512,17 @@ static inline void skb_orphan(struct sk_buff *skb) */ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { - if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY))) + if (likely(!skb_zcopy(skb))) + return 0; + if (skb_uarg(skb)->callback == sock_zerocopy_callback) + return 0; + return skb_copy_ubufs(skb, gfp_mask); +} + +/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */ +static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) +{ + if (likely(!skb_zcopy(skb))) return 0; return skb_copy_ubufs(skb, gfp_mask); } @@ -2944,6 +2954,8 @@ static inline int skb_add_data(struct sk_buff *skb, static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { + if (skb_zcopy(skb)) + return false; if (i) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; -- cgit From 4ab6c99d99bb1bf0fbba8ff4e52114c66109992f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:42 -0400 Subject: sock: MSG_ZEROCOPY notification coalescing In the simple case, each sendmsg() call generates data and eventually a zerocopy ready notification N, where N indicates the Nth successful invocation of sendmsg() with the MSG_ZEROCOPY flag on this socket. TCP and corked sockets can cause send() calls to append new data to an existing sk_buff and, thus, ubuf_info. In that case the notification must hold a range. odify ubuf_info to store a inclusive range [N..N+m] and add skb_zerocopy_realloc() to optionally extend an existing range. Also coalesce notifications in this common case: if a notification [1, 1] is about to be queued while [0, 0] is the queue tail, just modify the head of the queue to read [0, 1]. Coalescing is limited to a few TSO frames worth of data to bound notification latency. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e5387932c266..f5bdd93a87da 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -444,15 +444,26 @@ enum { */ struct ubuf_info { void (*callback)(struct ubuf_info *, bool zerocopy_success); - void *ctx; - unsigned long desc; - u16 zerocopy:1; + union { + struct { + unsigned long desc; + void *ctx; + }; + struct { + u32 id; + u16 len; + u16 zerocopy:1; + u32 bytelen; + }; + }; atomic_t refcnt; }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size); +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg); static inline void sock_zerocopy_get(struct ubuf_info *uarg) { -- cgit From a91dbff551a6f1865b68fa82b654591490b59901 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 3 Aug 2017 16:29:43 -0400 Subject: sock: ulimit on MSG_ZEROCOPY pages Bound the number of pages that a user may pin. Follow the lead of perf tools to maintain a per-user bound on memory locked pages commit 789f90fcf6b0 ("perf_counter: per user mlock gift") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/sched/user.h | 3 ++- include/linux/skbuff.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 5d5415e129d4..3c07e4135127 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -36,7 +36,8 @@ struct user_struct { struct hlist_node uidhash_node; kuid_t uid; -#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ + defined(CONFIG_NET) atomic_long_t locked_vm; #endif }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f5bdd93a87da..8c0708d2e5e6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -457,6 +457,11 @@ struct ubuf_info { }; }; atomic_t refcnt; + + struct mmpin { + struct user_struct *user; + unsigned int num_pg; + } mmp; }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) -- cgit From 999616b8536cf3b9a1d0d74d5542ea009df482ff Mon Sep 17 00:00:00 2001 From: Jerome Forissier Date: Wed, 31 May 2017 13:21:05 +0200 Subject: tee: add forward declaration for struct device tee_drv.h references struct device, but does not include device.h nor platform_device.h. Therefore, if tee_drv.h is included by some file that does not pull device.h nor platform_device.h beforehand, we have a compile warning. Fix this by adding a forward declaration. Signed-off-by: Jerome Forissier Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 0f175b8f6456..cb889afe576b 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -28,6 +28,7 @@ #define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ #define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ +struct device; struct tee_device; struct tee_shm; struct tee_shm_pool; -- cgit From 059cf566e123ca7eb7434285c6455d7afafb4e02 Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 16 Feb 2017 09:07:02 +0100 Subject: tee: indicate privileged dev in gen_caps Mirrors the TEE_DESC_PRIVILEGED bit of struct tee_desc:flags into struct tee_ioctl_version_data:gen_caps as TEE_GEN_CAP_PRIVILEGED in tee_ioctl_version() Reviewed-by: Jerome Forissier Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index 370d8845ab21..688782e90140 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -49,6 +49,7 @@ #define TEE_MAX_ARG_SIZE 1024 #define TEE_GEN_CAP_GP (1 << 0)/* GlobalPlatform compliant TEE */ +#define TEE_GEN_CAP_PRIVILEGED (1 << 1)/* Privileged device (for supplicant) */ /* * TEE Implementation ID -- cgit From 30d8340b5857550007ab6fe9744e4384dfa6e55c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 3 Aug 2017 20:20:42 +0900 Subject: ALSA: control: obsolete user_ctl_lock At a previous commit, concurrent requests for TLV data are maintained exclusively between read requests and write/command requests. TLV callback handlers in each driver has no risk from concurrent access for reference/change. In current implementation, 'struct snd_card' has a mutex to control concurrent accesses to user-defined element sets. This commit obsoletes it. Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- include/sound/core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 55385588eefa..357f36b5ee80 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -118,8 +118,6 @@ struct snd_card { int user_ctl_count; /* count of all user controls */ struct list_head controls; /* all controls for this card */ struct list_head ctl_files; /* active control files */ - struct mutex user_ctl_lock; /* protects user controls against - concurrent access */ struct snd_info_entry *proc_root; /* root for soundcard specific files */ struct snd_info_entry *proc_id; /* the card id */ -- cgit From 31128822ce46eda32b69a3f5ff5d29b05bc1facb Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 31 Jul 2017 13:38:31 +0200 Subject: clk: meson8b: expose every clock in the bindings Expose all clocks which maybe used as DT bindings Only clock ids internal the controller remain un-exposed (none on this particular controller at the moment) Acked-by: Neil Armstrong Acked-by: Arnd Bergmann Signed-off-by: Jerome Brunet Signed-off-by: Neil Armstrong --- include/dt-bindings/clock/meson8b-clkc.h | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/meson8b-clkc.h b/include/dt-bindings/clock/meson8b-clkc.h index e29227fb52a1..a9c0306330b6 100644 --- a/include/dt-bindings/clock/meson8b-clkc.h +++ b/include/dt-bindings/clock/meson8b-clkc.h @@ -21,15 +21,85 @@ #define CLKID_ZERO 13 #define CLKID_MPEG_SEL 14 #define CLKID_MPEG_DIV 15 +#define CLKID_DDR 16 +#define CLKID_DOS 17 +#define CLKID_ISA 18 +#define CLKID_PL301 19 +#define CLKID_PERIPHS 20 +#define CLKID_SPICC 21 +#define CLKID_I2C 22 #define CLKID_SAR_ADC 23 +#define CLKID_SMART_CARD 24 #define CLKID_RNG0 25 +#define CLKID_UART0 26 +#define CLKID_SDHC 27 +#define CLKID_STREAM 28 +#define CLKID_ASYNC_FIFO 29 #define CLKID_SDIO 30 +#define CLKID_ABUF 31 +#define CLKID_HIU_IFACE 32 +#define CLKID_ASSIST_MISC 33 +#define CLKID_SPI 34 +#define CLKID_I2S_SPDIF 35 #define CLKID_ETH 36 +#define CLKID_DEMUX 37 +#define CLKID_AIU_GLUE 38 +#define CLKID_IEC958 39 +#define CLKID_I2S_OUT 40 +#define CLKID_AMCLK 41 +#define CLKID_AIFIFO2 42 +#define CLKID_MIXER 43 +#define CLKID_MIXER_IFACE 44 +#define CLKID_ADC 45 +#define CLKID_BLKMV 46 +#define CLKID_AIU 47 +#define CLKID_UART1 48 +#define CLKID_G2D 49 #define CLKID_USB0 50 #define CLKID_USB1 51 +#define CLKID_RESET 52 +#define CLKID_NAND 53 +#define CLKID_DOS_PARSER 54 #define CLKID_USB 55 +#define CLKID_VDIN1 56 +#define CLKID_AHB_ARB0 57 +#define CLKID_EFUSE 58 +#define CLKID_BOOT_ROM 59 +#define CLKID_AHB_DATA_BUS 60 +#define CLKID_AHB_CTRL_BUS 61 +#define CLKID_HDMI_INTR_SYNC 62 +#define CLKID_HDMI_PCLK 63 #define CLKID_USB1_DDR_BRIDGE 64 #define CLKID_USB0_DDR_BRIDGE 65 +#define CLKID_MMC_PCLK 66 +#define CLKID_DVIN 67 +#define CLKID_UART2 68 #define CLKID_SANA 69 +#define CLKID_VPU_INTR 70 +#define CLKID_SEC_AHB_AHB3_BRIDGE 71 +#define CLKID_CLK81_A9 72 +#define CLKID_VCLK2_VENCI0 73 +#define CLKID_VCLK2_VENCI1 74 +#define CLKID_VCLK2_VENCP0 75 +#define CLKID_VCLK2_VENCP1 76 +#define CLKID_GCLK_VENCI_INT 77 +#define CLKID_GCLK_VENCP_INT 78 +#define CLKID_DAC_CLK 79 +#define CLKID_AOCLK_GATE 80 +#define CLKID_IEC958_GATE 81 +#define CLKID_ENC480P 82 +#define CLKID_RNG1 83 +#define CLKID_GCLK_VENCL_INT 84 +#define CLKID_VCLK2_VENCLMCC 85 +#define CLKID_VCLK2_VENCL 86 +#define CLKID_VCLK2_OTHER 87 +#define CLKID_EDP 88 +#define CLKID_AO_MEDIA_CPU 89 +#define CLKID_AO_AHB_SRAM 90 +#define CLKID_AO_AHB_BUS 91 +#define CLKID_AO_IFACE 92 +#define CLKID_MPLL0 93 +#define CLKID_MPLL1 94 +#define CLKID_MPLL2 95 #endif /* __MESON8B_CLKC_H */ -- cgit From 90640fd05ef27c7feb9da503ca6e14f16911bd6f Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 31 Jul 2017 13:38:32 +0200 Subject: clk: meson-gxbb: expose almost every clock in the bindings Expose all clocks which maybe used as DT bindings Only clock ids internal the controller remain un-exposed Acked-by: Neil Armstrong Acked-by: Arnd Bergmann Signed-off-by: Jerome Brunet Signed-off-by: Neil Armstrong --- include/dt-bindings/clock/gxbb-clkc.h | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h index e3e9f7919c31..e07fea011ebd 100644 --- a/include/dt-bindings/clock/gxbb-clkc.h +++ b/include/dt-bindings/clock/gxbb-clkc.h @@ -5,37 +5,96 @@ #ifndef __GXBB_CLKC_H #define __GXBB_CLKC_H +#define CLKID_SYS_PLL 0 #define CLKID_HDMI_PLL 2 +#define CLKID_FIXED_PLL 3 #define CLKID_FCLK_DIV2 4 #define CLKID_FCLK_DIV3 5 #define CLKID_FCLK_DIV4 6 +#define CLKID_FCLK_DIV5 7 +#define CLKID_FCLK_DIV7 8 #define CLKID_GP0_PLL 9 #define CLKID_CLK81 12 +#define CLKID_MPLL0 13 +#define CLKID_MPLL1 14 #define CLKID_MPLL2 15 +#define CLKID_DDR 16 +#define CLKID_DOS 17 +#define CLKID_ISA 18 +#define CLKID_PL301 19 +#define CLKID_PERIPHS 20 #define CLKID_SPICC 21 #define CLKID_I2C 22 #define CLKID_SAR_ADC 23 +#define CLKID_SMART_CARD 24 #define CLKID_RNG0 25 #define CLKID_UART0 26 +#define CLKID_SDHC 27 +#define CLKID_STREAM 28 +#define CLKID_ASYNC_FIFO 29 +#define CLKID_SDIO 30 +#define CLKID_ABUF 31 +#define CLKID_HIU_IFACE 32 +#define CLKID_ASSIST_MISC 33 #define CLKID_SPI 34 #define CLKID_ETH 36 +#define CLKID_I2S_SPDIF 35 +#define CLKID_DEMUX 37 #define CLKID_AIU_GLUE 38 #define CLKID_IEC958 39 #define CLKID_I2S_OUT 40 +#define CLKID_AMCLK 41 +#define CLKID_AIFIFO2 42 +#define CLKID_MIXER 43 #define CLKID_MIXER_IFACE 44 +#define CLKID_ADC 45 +#define CLKID_BLKMV 46 #define CLKID_AIU 47 #define CLKID_UART1 48 +#define CLKID_G2D 49 #define CLKID_USB0 50 #define CLKID_USB1 51 +#define CLKID_RESET 52 +#define CLKID_NAND 53 +#define CLKID_DOS_PARSER 54 #define CLKID_USB 55 +#define CLKID_VDIN1 56 +#define CLKID_AHB_ARB0 57 +#define CLKID_EFUSE 58 +#define CLKID_BOOT_ROM 59 +#define CLKID_AHB_DATA_BUS 60 +#define CLKID_AHB_CTRL_BUS 61 +#define CLKID_HDMI_INTR_SYNC 62 #define CLKID_HDMI_PCLK 63 #define CLKID_USB1_DDR_BRIDGE 64 #define CLKID_USB0_DDR_BRIDGE 65 +#define CLKID_MMC_PCLK 66 +#define CLKID_DVIN 67 #define CLKID_UART2 68 #define CLKID_SANA 69 +#define CLKID_VPU_INTR 70 +#define CLKID_SEC_AHB_AHB3_BRIDGE 71 +#define CLKID_CLK81_A53 72 +#define CLKID_VCLK2_VENCI0 73 +#define CLKID_VCLK2_VENCI1 74 +#define CLKID_VCLK2_VENCP0 75 +#define CLKID_VCLK2_VENCP1 76 #define CLKID_GCLK_VENCI_INT0 77 +#define CLKID_GCLK_VENCI_INT 78 +#define CLKID_DAC_CLK 79 #define CLKID_AOCLK_GATE 80 #define CLKID_IEC958_GATE 81 +#define CLKID_ENC480P 82 +#define CLKID_RNG1 83 +#define CLKID_GCLK_VENCI_INT1 84 +#define CLKID_VCLK2_VENCLMCC 85 +#define CLKID_VCLK2_VENCL 86 +#define CLKID_VCLK_OTHER 87 +#define CLKID_EDP 88 +#define CLKID_AO_MEDIA_CPU 89 +#define CLKID_AO_AHB_SRAM 90 +#define CLKID_AO_AHB_BUS 91 +#define CLKID_AO_IFACE 92 #define CLKID_AO_I2C 93 #define CLKID_SD_EMMC_A 94 #define CLKID_SD_EMMC_B 95 @@ -50,5 +109,6 @@ #define CLKID_CTS_AMCLK 107 #define CLKID_CTS_MCLK_I958 110 #define CLKID_CTS_I958 113 +#define CLKID_32K_CLK 114 #endif /* __GXBB_CLKC_H */ -- cgit From a5841de6911fb7759514325869d11c6c9d7df51a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 31 Jul 2017 13:56:02 +0200 Subject: clk: meson: gxbb: Add sd_emmc clk0 clkids Add the clkids for the clocks feeding the input0 of the mmc controllers Signed-off-by: Jerome Brunet Signed-off-by: Neil Armstrong --- include/dt-bindings/clock/gxbb-clkc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h index e07fea011ebd..c04a76d8facf 100644 --- a/include/dt-bindings/clock/gxbb-clkc.h +++ b/include/dt-bindings/clock/gxbb-clkc.h @@ -110,5 +110,8 @@ #define CLKID_CTS_MCLK_I958 110 #define CLKID_CTS_I958 113 #define CLKID_32K_CLK 114 +#define CLKID_SD_EMMC_A_CLK0 119 +#define CLKID_SD_EMMC_B_CLK0 122 +#define CLKID_SD_EMMC_C_CLK0 125 #endif /* __GXBB_CLKC_H */ -- cgit From 596f2b78da7b658b5e7758d18e8135401ca12fad Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 1 Aug 2017 13:56:58 +0200 Subject: dt-bindings: clock: gxbb-aoclk: Add CEC 32k clock This patchadds the clock binding entry for the CEC 32K AO Clock. Signed-off-by: Neil Armstrong --- include/dt-bindings/clock/gxbb-aoclkc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/gxbb-aoclkc.h b/include/dt-bindings/clock/gxbb-aoclkc.h index 31751482d13c..9d15e2221fdb 100644 --- a/include/dt-bindings/clock/gxbb-aoclkc.h +++ b/include/dt-bindings/clock/gxbb-aoclkc.h @@ -62,5 +62,6 @@ #define CLKID_AO_UART1 3 #define CLKID_AO_UART2 4 #define CLKID_AO_IR_BLASTER 5 +#define CLKID_AO_CEC_32K 6 #endif -- cgit From 4ebc1e3cfcd8778e2150bdb799b19e85348b8efa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:57 +0200 Subject: net: sched: remove unneeded tcf_em_tree_change Since tcf_em_tree_validate could be always called on a newly created filter, there is no need for this change function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 537d0a0ad4c4..f4462ec8b2f4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -332,26 +332,6 @@ int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int); int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, struct tcf_pkt_info *); -/** - * tcf_em_tree_change - replace ematch tree of a running classifier - * - * @tp: classifier kind handle - * @dst: destination ematch tree variable - * @src: source ematch tree (temporary tree from tcf_em_tree_validate) - * - * This functions replaces the ematch tree in @dst with the ematch - * tree in @src. The classifier in charge of the ematch tree may be - * running. - */ -static inline void tcf_em_tree_change(struct tcf_proto *tp, - struct tcf_ematch_tree *dst, - struct tcf_ematch_tree *src) -{ - tcf_tree_lock(tp); - memcpy(dst, src, sizeof(*dst)); - tcf_tree_unlock(tp); -} - /** * tcf_em_tree_match - evaulate an ematch tree * @@ -386,7 +366,6 @@ struct tcf_ematch_tree { #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0) #define tcf_em_tree_destroy(t) do { (void)(t); } while(0) #define tcf_em_tree_dump(skb, t, tlv) (0) -#define tcf_em_tree_change(tp, dst, src) do { } while(0) #define tcf_em_tree_match(skb, t, info) ((void)(info), 1) #endif /* CONFIG_NET_EMATCH */ -- cgit From 3bcc0cec818fa969fe555b44443347211ed787a3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:58 +0200 Subject: net: sched: change names of action number helpers to be aligned with the rest The rest of the helpers are named tcf_exts_*, so change the name of the action number helpers to be aligned. While at it, change to inline functions. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f4462ec8b2f4..7f2563636df0 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -199,17 +199,35 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return 0; } +/** + * tcf_exts_has_actions - check if at least one action is present + * @exts: tc filter extensions handle + * + * Returns true if at least one action is present. + */ +static inline bool tcf_exts_has_actions(struct tcf_exts *exts) +{ #ifdef CONFIG_NET_CLS_ACT + return exts->nr_actions; +#else + return false; +#endif +} -#define tc_no_actions(_exts) ((_exts)->nr_actions == 0) -#define tc_single_action(_exts) ((_exts)->nr_actions == 1) - -#else /* CONFIG_NET_CLS_ACT */ - -#define tc_no_actions(_exts) true -#define tc_single_action(_exts) false - -#endif /* CONFIG_NET_CLS_ACT */ +/** + * tcf_exts_has_one_action - check if exactly one action is present + * @exts: tc filter extensions handle + * + * Returns true if exactly one action is present. + */ +static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + return exts->nr_actions == 1; +#else + return false; +#endif +} int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, -- cgit From af69afc551eb9f9b1f2cc3295e2dfcdaa1dc948d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:28:59 +0200 Subject: net: sched: use tcf_exts_has_actions in tcf_exts_exec Use the tcf_exts_has_actions helper instead or directly testing exts->nr_actions in tcf_exts_exec. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 7f2563636df0..322a2823cc6a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -176,29 +176,6 @@ tcf_exts_stats_update(const struct tcf_exts *exts, #endif } -/** - * tcf_exts_exec - execute tc filter extensions - * @skb: socket buffer - * @exts: tc filter extensions handle - * @res: desired result - * - * Executes all configured extensions. Returns 0 on a normal execution, - * a negative number if the filter must be considered unmatched or - * a positive action code (TC_ACT_*) which must be returned to the - * underlying layer. - */ -static inline int -tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_result *res) -{ -#ifdef CONFIG_NET_CLS_ACT - if (exts->nr_actions) - return tcf_action_exec(skb, exts->actions, exts->nr_actions, - res); -#endif - return 0; -} - /** * tcf_exts_has_actions - check if at least one action is present * @exts: tc filter extensions handle @@ -229,6 +206,29 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) #endif } +/** + * tcf_exts_exec - execute tc filter extensions + * @skb: socket buffer + * @exts: tc filter extensions handle + * @res: desired result + * + * Executes all configured extensions. Returns 0 on a normal execution, + * a negative number if the filter must be considered unmatched or + * a positive action code (TC_ACT_*) which must be returned to the + * underlying layer. + */ +static inline int +tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, + struct tcf_result *res) +{ +#ifdef CONFIG_NET_CLS_ACT + if (tcf_exts_has_actions(exts)) + return tcf_action_exec(skb, exts->actions, exts->nr_actions, + res); +#endif + return 0; +} + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); -- cgit From 6fc6d06e5371507e68c6904a3423622b0e465b64 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:00 +0200 Subject: net: sched: remove redundant helpers tcf_exts_is_predicative and tcf_exts_is_available These two helpers are doing the same as tcf_exts_has_actions, so remove them and use tcf_exts_has_actions instead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 322a2823cc6a..817badf733b5 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -113,36 +113,6 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) return 0; } -/** - * tcf_exts_is_predicative - check if a predicative extension is present - * @exts: tc filter extensions handle - * - * Returns 1 if a predicative extension is present, i.e. an extension which - * might cause further actions and thus overrule the regular tcf_result. - */ -static inline int -tcf_exts_is_predicative(struct tcf_exts *exts) -{ -#ifdef CONFIG_NET_CLS_ACT - return exts->nr_actions; -#else - return 0; -#endif -} - -/** - * tcf_exts_is_available - check if at least one extension is present - * @exts: tc filter extensions handle - * - * Returns 1 if at least one extension is present. - */ -static inline int -tcf_exts_is_available(struct tcf_exts *exts) -{ - /* All non-predicative extensions must be added here. */ - return tcf_exts_is_predicative(exts); -} - static inline void tcf_exts_to_list(const struct tcf_exts *exts, struct list_head *actions) { -- cgit From af089e701adfb5898fee00a56ea4bb421edc308d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:01 +0200 Subject: net: sched: fix return value of tcf_exts_exec Return the defined TC_ACT_OK instead of 0. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 817badf733b5..61ce521688b2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -182,7 +182,7 @@ static inline bool tcf_exts_has_one_action(struct tcf_exts *exts) * @exts: tc filter extensions handle * @res: desired result * - * Executes all configured extensions. Returns 0 on a normal execution, + * Executes all configured extensions. Returns TC_ACT_OK on a normal execution, * a negative number if the filter must be considered unmatched or * a positive action code (TC_ACT_*) which must be returned to the * underlying layer. @@ -196,7 +196,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return tcf_action_exec(skb, exts->actions, exts->nr_actions, res); #endif - return 0; + return TC_ACT_OK; } int tcf_exts_validate(struct net *net, struct tcf_proto *tp, -- cgit From ec1a9cca0e13391167567964fd04e61a39d6a4ae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:02 +0200 Subject: net: sched: remove check for number of actions in tcf_exts_exec Leave it to tcf_action_exec to return TC_ACT_OK in case there is no action present. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 61ce521688b2..b8959c9a190d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -192,9 +192,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, struct tcf_result *res) { #ifdef CONFIG_NET_CLS_ACT - if (tcf_exts_has_actions(exts)) - return tcf_action_exec(skb, exts->actions, exts->nr_actions, - res); + return tcf_action_exec(skb, exts->actions, exts->nr_actions, res); #endif return TC_ACT_OK; } -- cgit From 9b0d4446b56904b59ae3809913b0ac760fa941a6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 4 Aug 2017 14:29:15 +0200 Subject: net: sched: avoid atomic swap in tcf_exts_change tcf_exts_change is always called on newly created exts, which are not used on fastpath. Therefore, simple struct copy is enough. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b8959c9a190d..e0c54f111467 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -201,8 +201,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); void tcf_exts_destroy(struct tcf_exts *exts); -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, - struct tcf_exts *src); +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, -- cgit From 56ce097c1caede1f9c191a7c9699b950e7c36ad9 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 4 Aug 2017 08:24:05 -0700 Subject: net: comment fixes against BPF devmap helper calls Update BPF comments to accurately reflect XDP usage. Fixes: 97f91a7cf04ff ("bpf: add bpf_redirect_map helper routine") Reported-by: Alexei Starovoitov Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1106a8c4cd36..1d06be1569b1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -345,14 +345,20 @@ union bpf_attr { * int bpf_redirect(ifindex, flags) * redirect to another netdev * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT - * int bpf_redirect_map(key, map, flags) + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) * redirect to endpoint in map + * @map: pointer to dev map * @key: index in map to lookup - * @map: fd of map to do lookup in * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid -- cgit From 8eede5bc4ef04281fbba7ddfe157052d0e76075d Mon Sep 17 00:00:00 2001 From: Nate Watterson Date: Thu, 20 Jul 2017 15:26:24 -0400 Subject: ata: ahci_platform: Add shutdown handler The newly introduced ahci_platform_shutdown() method is called during system shutdown to disable host controller DMA and interrupts in order to avoid potentially corrupting or otherwise interfering with a new kernel being started with kexec. Signed-off-by: Nate Watterson Signed-off-by: Tejun Heo --- include/linux/ahci_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index a270f25ee7c7..1b0a17b22cd3 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -36,6 +36,8 @@ int ahci_platform_init_host(struct platform_device *pdev, const struct ata_port_info *pi_template, struct scsi_host_template *sht); +void ahci_platform_shutdown(struct platform_device *pdev); + int ahci_platform_suspend_host(struct device *dev); int ahci_platform_resume_host(struct device *dev); int ahci_platform_suspend(struct device *dev); -- cgit From e870c6c87cf9484090d28f2a68aa29e008960c93 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jul 2017 23:43:18 +0200 Subject: ACPI / PM: Prefer suspend-to-idle over S3 on some systems Modify the ACPI system sleep support setup code to select suspend-to-idle as the default system sleep state if (1) the ACPI_FADT_LOW_POWER_S0 flag is set in the FADT and (2) the Low Power Idle S0 _DSM interface has been discovered and (3) the default sleep state was not selected from the kernel command line. The main motivation for this change is that systems where the (1) and (2) conditions are met typically ship with OSes that don't exercise the S3 path in the platform firmware which remains untested and turns out to be non-functional at least in some cases. Signed-off-by: Rafael J. Wysocki Tested-by: Mario Limonciello --- include/linux/suspend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 97e394feabdb..8c3b0b1e6786 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -196,6 +196,9 @@ struct platform_freeze_ops { }; #ifdef CONFIG_SUSPEND +extern suspend_state_t mem_sleep_current; +extern suspend_state_t mem_sleep_default; + /** * suspend_set_ops - set platform dependent suspend operations * @ops: The new suspend operations to set. -- cgit From 401c0a19c6c22efcaff85d5a64a396f9130da2ca Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 4 Aug 2017 17:20:16 -0700 Subject: nfit, libnvdimm, region: export 'position' in mapping info It is useful to be able to know the position of a DIMM in an interleave-set. Consider the case where the order of the DIMMs changes causing a namespace to be invalidated because the interleave-set cookie no longer matches. If the before and after state of each DIMM position is known this state debugged by the system owner. Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f3d3e6af8838..9b8d81a7b80e 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -87,6 +87,7 @@ struct nd_mapping_desc { struct nvdimm *nvdimm; u64 start; u64 size; + int position; }; struct nd_region_desc { -- cgit From 472b46c352c9ff0b6fa57dbf85d77c51901a3368 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 6 Aug 2017 18:44:27 +0200 Subject: uapi linux/kfd_ioctl.h: only use __u32 and __u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include instead of which on Linux includes and on non-Linux platforms defines __u32 etc types. Fixes user space compilation errors like: linux/kfd_ioctl.h:33:2: error: unknown type name ‘uint32_t’ uint32_t major_version; /* from KFD */ ^~~~~~~~ Signed-off-by: Mikko Rapeli Acked-by: Arnd Bergmann Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 172 ++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 7b4567bacfc2..26283fefdf5f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -23,15 +23,15 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED -#include +#include #include #define KFD_IOCTL_MAJOR_VERSION 1 #define KFD_IOCTL_MINOR_VERSION 1 struct kfd_ioctl_get_version_args { - uint32_t major_version; /* from KFD */ - uint32_t minor_version; /* from KFD */ + __u32 major_version; /* from KFD */ + __u32 minor_version; /* from KFD */ }; /* For kfd_ioctl_create_queue_args.queue_type. */ @@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args { #define KFD_MAX_QUEUE_PRIORITY 15 struct kfd_ioctl_create_queue_args { - uint64_t ring_base_address; /* to KFD */ - uint64_t write_pointer_address; /* from KFD */ - uint64_t read_pointer_address; /* from KFD */ - uint64_t doorbell_offset; /* from KFD */ - - uint32_t ring_size; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t queue_type; /* to KFD */ - uint32_t queue_percentage; /* to KFD */ - uint32_t queue_priority; /* to KFD */ - uint32_t queue_id; /* from KFD */ - - uint64_t eop_buffer_address; /* to KFD */ - uint64_t eop_buffer_size; /* to KFD */ - uint64_t ctx_save_restore_address; /* to KFD */ - uint64_t ctx_save_restore_size; /* to KFD */ + __u64 ring_base_address; /* to KFD */ + __u64 write_pointer_address; /* from KFD */ + __u64 read_pointer_address; /* from KFD */ + __u64 doorbell_offset; /* from KFD */ + + __u32 ring_size; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 queue_type; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* from KFD */ + + __u64 eop_buffer_address; /* to KFD */ + __u64 eop_buffer_size; /* to KFD */ + __u64 ctx_save_restore_address; /* to KFD */ + __u64 ctx_save_restore_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { - uint32_t queue_id; /* to KFD */ - uint32_t pad; + __u32 queue_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_update_queue_args { - uint64_t ring_base_address; /* to KFD */ + __u64 ring_base_address; /* to KFD */ - uint32_t queue_id; /* to KFD */ - uint32_t ring_size; /* to KFD */ - uint32_t queue_percentage; /* to KFD */ - uint32_t queue_priority; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 ring_size; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ @@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args { #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 struct kfd_ioctl_set_memory_policy_args { - uint64_t alternate_aperture_base; /* to KFD */ - uint64_t alternate_aperture_size; /* to KFD */ + __u64 alternate_aperture_base; /* to KFD */ + __u64 alternate_aperture_size; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t default_policy; /* to KFD */ - uint32_t alternate_policy; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 default_policy; /* to KFD */ + __u32 alternate_policy; /* to KFD */ + __u32 pad; }; /* @@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args { */ struct kfd_ioctl_get_clock_counters_args { - uint64_t gpu_clock_counter; /* from KFD */ - uint64_t cpu_clock_counter; /* from KFD */ - uint64_t system_clock_counter; /* from KFD */ - uint64_t system_clock_freq; /* from KFD */ + __u64 gpu_clock_counter; /* from KFD */ + __u64 cpu_clock_counter; /* from KFD */ + __u64 system_clock_counter; /* from KFD */ + __u64 system_clock_freq; /* from KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; #define NUM_OF_SUPPORTED_GPUS 7 struct kfd_process_device_apertures { - uint64_t lds_base; /* from KFD */ - uint64_t lds_limit; /* from KFD */ - uint64_t scratch_base; /* from KFD */ - uint64_t scratch_limit; /* from KFD */ - uint64_t gpuvm_base; /* from KFD */ - uint64_t gpuvm_limit; /* from KFD */ - uint32_t gpu_id; /* from KFD */ - uint32_t pad; + __u64 lds_base; /* from KFD */ + __u64 lds_limit; /* from KFD */ + __u64 scratch_base; /* from KFD */ + __u64 scratch_limit; /* from KFD */ + __u64 gpuvm_base; /* from KFD */ + __u64 gpuvm_limit; /* from KFD */ + __u32 gpu_id; /* from KFD */ + __u32 pad; }; struct kfd_ioctl_get_process_apertures_args { @@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args { process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ - uint32_t num_of_nodes; - uint32_t pad; + __u32 num_of_nodes; + __u32 pad; }; #define MAX_ALLOWED_NUM_POINTS 100 @@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args { #define MAX_ALLOWED_WAC_BUFF_SIZE 128 struct kfd_ioctl_dbg_register_args { - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_unregister_args { - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_address_watch_args { - uint64_t content_ptr; /* a pointer to the actual content */ - uint32_t gpu_id; /* to KFD */ - uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; struct kfd_ioctl_dbg_wave_control_args { - uint64_t content_ptr; /* a pointer to the actual content */ - uint32_t gpu_id; /* to KFD */ - uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; /* Matching HSA_EVENTTYPE */ @@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_SIGNAL_EVENT_LIMIT 256 struct kfd_ioctl_create_event_args { - uint64_t event_page_offset; /* from KFD */ - uint32_t event_trigger_data; /* from KFD - signal events only */ - uint32_t event_type; /* to KFD */ - uint32_t auto_reset; /* to KFD */ - uint32_t node_id; /* to KFD - only valid for certain + __u64 event_page_offset; /* from KFD */ + __u32 event_trigger_data; /* from KFD - signal events only */ + __u32 event_type; /* to KFD */ + __u32 auto_reset; /* to KFD */ + __u32 node_id; /* to KFD - only valid for certain event types */ - uint32_t event_id; /* from KFD */ - uint32_t event_slot_index; /* from KFD */ + __u32 event_id; /* from KFD */ + __u32 event_slot_index; /* from KFD */ }; struct kfd_ioctl_destroy_event_args { - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_set_event_args { - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_reset_event_args { - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_memory_exception_failure { - uint32_t NotPresent; /* Page not present or supervisor privilege */ - uint32_t ReadOnly; /* Write access to a read-only page */ - uint32_t NoExecute; /* Execute access to a page marked NX */ - uint32_t pad; + __u32 NotPresent; /* Page not present or supervisor privilege */ + __u32 ReadOnly; /* Write access to a read-only page */ + __u32 NoExecute; /* Execute access to a page marked NX */ + __u32 pad; }; /* memory exception data*/ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; - uint64_t va; - uint32_t gpu_id; - uint32_t pad; + __u64 va; + __u32 gpu_id; + __u32 pad; }; /* Event data*/ @@ -217,19 +217,19 @@ struct kfd_event_data { union { struct kfd_hsa_memory_exception_data memory_exception_data; }; /* From KFD */ - uint64_t kfd_event_data_ext; /* pointer to an extension structure + __u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */ - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_wait_events_args { - uint64_t events_ptr; /* pointed to struct + __u64 events_ptr; /* pointed to struct kfd_event_data array, to KFD */ - uint32_t num_events; /* to KFD */ - uint32_t wait_for_all; /* to KFD */ - uint32_t timeout; /* to KFD */ - uint32_t wait_result; /* from KFD */ + __u32 num_events; /* to KFD */ + __u32 wait_for_all; /* to KFD */ + __u32 timeout; /* to KFD */ + __u32 wait_result; /* from KFD */ }; struct kfd_ioctl_set_scratch_backing_va_args { -- cgit From cbbd6c2f551c27199c90f680851609f888a3048b Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Wed, 2 Aug 2017 16:28:39 +0800 Subject: clk: rockchip: add more clk ids for rv1108 Add new clk ids for the peripherals on rv1108 soc. Signed-off-by: Elaine Zhang Signed-off-by: Andy Yan Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rv1108-cru.h | 93 +++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h index ae26f8105914..fe013cf51e2b 100644 --- a/include/dt-bindings/clock/rv1108-cru.h +++ b/include/dt-bindings/clock/rv1108-cru.h @@ -43,12 +43,73 @@ #define SCLK_SDMMC_SAMPLE 84 #define SCLK_SDIO_SAMPLE 85 #define SCLK_EMMC_SAMPLE 86 +#define SCLK_VENC_CORE 87 +#define SCLK_HEVC_CORE 88 +#define SCLK_HEVC_CABAC 89 +#define SCLK_PWM0_PMU 90 +#define SCLK_I2C0_PMU 91 +#define SCLK_WIFI 92 +#define SCLK_CIFOUT 93 +#define SCLK_MIPI_CSI_OUT 94 +#define SCLK_CIF0 95 +#define SCLK_CIF1 96 +#define SCLK_CIF2 97 +#define SCLK_CIF3 98 +#define SCLK_DSP 99 +#define SCLK_DSP_IOP 100 +#define SCLK_DSP_EPP 101 +#define SCLK_DSP_EDP 102 +#define SCLK_DSP_EDAP 103 +#define SCLK_CVBS_HOST 104 +#define SCLK_HDMI_SFR 105 +#define SCLK_HDMI_CEC 106 +#define SCLK_CRYPTO 107 +#define SCLK_SPI 108 +#define SCLK_SARADC 109 +#define SCLK_TSADC 110 +#define SCLK_MACPHY_PRE 111 +#define SCLK_MACPHY 112 +#define SCLK_MACPHY_RX 113 +#define SCLK_MAC_REF 114 +#define SCLK_MAC_REFOUT 115 +#define SCLK_DSP_PFM 116 +#define SCLK_RGA 117 +#define SCLK_I2C1 118 +#define SCLK_I2C2 119 +#define SCLK_I2C3 120 +#define SCLK_PWM 121 +#define SCLK_ISP 122 +#define SCLK_USBPHY 123 +#define SCLK_I2S0_SRC 124 +#define SCLK_I2S1_SRC 125 +#define SCLK_I2S2_SRC 126 +#define SCLK_UART0_SRC 127 +#define SCLK_UART1_SRC 128 +#define SCLK_UART2_SRC 129 + +#define DCLK_VOP_SRC 185 +#define DCLK_HDMIPHY 186 +#define DCLK_VOP 187 /* aclk gates */ #define ACLK_DMAC 192 #define ACLK_PRE 193 #define ACLK_CORE 194 #define ACLK_ENMCORE 195 +#define ACLK_RKVENC 196 +#define ACLK_RKVDEC 197 +#define ACLK_VPU 198 +#define ACLK_CIF0 199 +#define ACLK_VIO0 200 +#define ACLK_VIO1 201 +#define ACLK_VOP 202 +#define ACLK_IEP 203 +#define ACLK_RGA 204 +#define ACLK_ISP 205 +#define ACLK_CIF1 206 +#define ACLK_CIF2 207 +#define ACLK_CIF3 208 +#define ACLK_PERI 209 /* pclk gates */ #define PCLK_GPIO1 256 @@ -67,6 +128,19 @@ #define PCLK_PWM 269 #define PCLK_TIMER 270 #define PCLK_PERI 271 +#define PCLK_GPIO0_PMU 272 +#define PCLK_I2C0_PMU 273 +#define PCLK_PWM0_PMU 274 +#define PCLK_ISP 275 +#define PCLK_VIO 276 +#define PCLK_MIPI_DSI 277 +#define PCLK_HDMI_CTRL 278 +#define PCLK_SARADC 279 +#define PCLK_DSP_CFG 280 +#define PCLK_BUS 281 +#define PCLK_EFUSE0 282 +#define PCLK_EFUSE1 283 +#define PCLK_WDT 284 /* hclk gates */ #define HCLK_I2S0_8CH 320 @@ -78,8 +152,25 @@ #define HCLK_EMMC 326 #define HCLK_PERI 327 #define HCLK_SFC 328 +#define HCLK_RKVENC 329 +#define HCLK_RKVDEC 330 +#define HCLK_CIF0 331 +#define HCLK_VIO 332 +#define HCLK_VOP 333 +#define HCLK_IEP 334 +#define HCLK_RGA 335 +#define HCLK_ISP 336 +#define HCLK_CRYPTO_MST 337 +#define HCLK_CRYPTO_SLV 338 +#define HCLK_HOST0 339 +#define HCLK_OTG 340 +#define HCLK_CIF1 341 +#define HCLK_CIF2 342 +#define HCLK_CIF3 343 +#define HCLK_BUS 344 +#define HCLK_VPU 345 -#define CLK_NR_CLKS (HCLK_SFC + 1) +#define CLK_NR_CLKS (HCLK_VPU + 1) /* reset id */ #define SRST_CORE_PO_AD 0 -- cgit From 1b6428a286ae3577b2ba3328a054115d561b8650 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Wed, 2 Aug 2017 16:29:48 +0800 Subject: clk: rockchip: rename the clk id for HCLK_I2S1_2CH i2s1 has 2 channels but not 8 channels. Signed-off-by: Elaine Zhang Signed-off-by: Andy Yan [and the clock id hasn't been used in either clock-driver nor dts, so is safe to rename] Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rv1108-cru.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h index fe013cf51e2b..e8aa6d56990a 100644 --- a/include/dt-bindings/clock/rv1108-cru.h +++ b/include/dt-bindings/clock/rv1108-cru.h @@ -144,7 +144,7 @@ /* hclk gates */ #define HCLK_I2S0_8CH 320 -#define HCLK_I2S1_8CH 321 +#define HCLK_I2S1_2CH 321 #define HCLK_I2S2_2CH 322 #define HCLK_NANDC 323 #define HCLK_SDMMC 324 -- cgit From a376a4b0453ffac35ba6215dd1d1e53e37c5e810 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Wed, 2 Aug 2017 16:30:33 +0800 Subject: clk: rockchip: fix up indentation of some RV1108 clock-ids Make the code look better. Signed-off-by: Elaine Zhang Signed-off-by: Andy Yan Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rv1108-cru.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h index e8aa6d56990a..f269d833e41a 100644 --- a/include/dt-bindings/clock/rv1108-cru.h +++ b/include/dt-bindings/clock/rv1108-cru.h @@ -173,16 +173,16 @@ #define CLK_NR_CLKS (HCLK_VPU + 1) /* reset id */ -#define SRST_CORE_PO_AD 0 +#define SRST_CORE_PO_AD 0 #define SRST_CORE_AD 1 #define SRST_L2_AD 2 -#define SRST_CPU_NIU_AD 3 +#define SRST_CPU_NIU_AD 3 #define SRST_CORE_PO 4 #define SRST_CORE 5 -#define SRST_L2 6 +#define SRST_L2 6 #define SRST_CORE_DBG 8 #define PRST_DBG 9 -#define RST_DAP 10 +#define RST_DAP 10 #define PRST_DBG_NIU 11 #define ARST_STRC_SYS_AD 15 @@ -249,9 +249,9 @@ #define HRST_SYSBUS 75 #define PRST_USBGRF 76 -#define ARST_PERIPH_NIU 80 -#define HRST_PERIPH_NIU 81 -#define PRST_PERIPH_NIU 82 +#define ARST_PERIPH_NIU 80 +#define HRST_PERIPH_NIU 81 +#define PRST_PERIPH_NIU 82 #define HRST_PERIPH 83 #define HRST_SDMMC 84 #define HRST_SDIO 85 @@ -269,7 +269,7 @@ #define HRST_HOST0_AUX 96 #define HRST_HOST0_ARB 97 #define SRST_HOST0_EHCIPHY 98 -#define SRST_HOST0_UTMI 99 +#define SRST_HOST0_UTMI 99 #define SRST_USBPOR 100 #define SRST_UTMI0 101 #define SRST_UTMI1 102 @@ -316,21 +316,21 @@ #define HRST_VPU_NIU 141 #define ARST_VPU 142 #define HRST_VPU 143 -#define ARST_RKVDEC_NIU 144 -#define HRST_RKVDEC_NIU 145 +#define ARST_RKVDEC_NIU 144 +#define HRST_RKVDEC_NIU 145 #define ARST_RKVDEC 146 #define HRST_RKVDEC 147 #define SRST_RKVDEC_CABAC 148 #define SRST_RKVDEC_CORE 149 -#define ARST_RKVENC_NIU 150 -#define HRST_RKVENC_NIU 151 +#define ARST_RKVENC_NIU 150 +#define HRST_RKVENC_NIU 151 #define ARST_RKVENC 152 #define HRST_RKVENC 153 #define SRST_RKVENC_CORE 154 #define SRST_DSP_CORE 156 #define SRST_DSP_SYS 157 -#define SRST_DSP_GLOBAL 158 +#define SRST_DSP_GLOBAL 158 #define SRST_DSP_OECM 159 #define PRST_DSP_IOP_NIU 160 #define ARST_DSP_EPP_NIU 161 @@ -348,7 +348,7 @@ #define SRST_PMU_I2C0 173 #define PRST_PMU_I2C0 174 #define PRST_PMU_GPIO0 175 -#define PRST_PMU_INTMEM 176 +#define PRST_PMU_INTMEM 176 #define PRST_PMU_PWM0 177 #define SRST_PMU_PWM0 178 #define PRST_PMU_GRF 179 -- cgit From 91ed1e666a4ea2e260452a7d7d311ac5ae852cba Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 3 Aug 2017 18:07:06 +0200 Subject: ip/options: explicitly provide net ns to __ip_options_echo() __ip_options_echo() uses the current network namespace, and currently retrives it via skb->dst->dev. This commit adds an explicit 'net' argument to __ip_options_echo() and update all the call sites to provide it, usually via a simpler sock_net(). After this change, __ip_options_echo() no more needs to access skb->dst and we can drop a couple of hack to preserve such info in the rx path. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/ip.h | 9 +++++---- include/net/tcp.h | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 821cedcc8e73..9e59dcf1787a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -567,11 +567,12 @@ int ip_forward(struct sk_buff *skb); void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); -int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, - const struct ip_options *sopt); -static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) +int __ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb, const struct ip_options *sopt); +static inline int ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb) { - return __ip_options_echo(dopt, skb, &IPCB(skb)->opt); + return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt); } void ip_options_fragment(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index bb1881b4ce48..5173fecde495 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1885,7 +1885,8 @@ extern void tcp_rack_reo_timeout(struct sock *sk); /* * Save and compile IPv4 options, return a pointer to it */ -static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) +static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net, + struct sk_buff *skb) { const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct ip_options_rcu *dopt = NULL; @@ -1894,7 +1895,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) int opt_size = sizeof(*dopt) + opt->optlen; dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { + if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) { kfree(dopt); dopt = NULL; } -- cgit From da4625ac2637e4e5249dc08a10f8dce7643603d2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:02:42 +0100 Subject: net: phy: split out PHY speed and duplex string generation Other code would like to make use of this, so make the speed and duplex string generation visible, and place it in a separate file. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0bb5b212ab42..e8264c78b75b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -667,6 +667,9 @@ struct phy_fixup { int (*run)(struct phy_device *phydev); }; +const char *phy_speed_to_str(int speed); +const char *phy_duplex_to_str(unsigned int duplex); + /** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit From 0ccb4fc65d2799a315d5ee8732d75f35a114379c Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:02:47 +0100 Subject: net: phy: move phy_lookup_setting() and guts of phy_supported_speeds() to phy-core phy_lookup_setting() provides useful functionality in ethtool code outside phylib. Move it to phy-core and allow it to be re-used (eg, in phylink) rather than duplicated elsewhere. Note that this supports the larger linkmode space. As we move the phy settings table, we also need to move the guts of phy_supported_speeds() as well. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e8264c78b75b..8a280257778c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -670,6 +670,21 @@ struct phy_fixup { const char *phy_speed_to_str(int speed); const char *phy_duplex_to_str(unsigned int duplex); +/* A structure for mapping a particular speed and duplex + * combination to a particular SUPPORTED and ADVERTISED value + */ +struct phy_setting { + u32 speed; + u8 duplex; + u8 bit; +}; + +const struct phy_setting * +phy_lookup_setting(int speed, int duplex, const unsigned long *mask, + size_t maxbit, bool exact); +size_t phy_speeds(unsigned int *speeds, size_t size, + unsigned long *mask, size_t maxbit); + /** * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. -- cgit From a81497bee70eb15039594b3116913133aa9c9b29 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:02:58 +0100 Subject: net: phy: provide a hook for link up/link down events Sometimes, we need to do additional work between the PHY coming up and marking the carrier present - for example, we may need to wait for the PHY to MAC link to finish negotiation. This changes phylib to provide a notification function pointer which avoids the built-in netif_carrier_on() and netif_carrier_off() functions. Standard ->adjust_link functionality is provided by hooking a helper into the new ->phy_link_change method. Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8a280257778c..0a5e8e62c9e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -474,6 +474,7 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; + void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier); void (*adjust_link)(struct net_device *dev); }; #define to_phy_device(d) container_of(to_mdio_device(d), \ -- cgit From 9525ae83959b60c6061fe2f2caabdc8f69a48bc6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:03:13 +0100 Subject: phylink: add phylink infrastructure The link between the ethernet MAC and its PHY has become more complex as the interface evolves. This is especially true with serdes links, where the part of the PHY is effectively integrated into the MAC. Serdes links can be connected to a variety of devices, including SFF modules soldered down onto the board with the MAC, a SFP cage with a hotpluggable SFP module which may contain a PHY or directly modulate the serdes signals onto optical media with or without a PHY, or even a classical PHY connection. Moreover, the negotiation information on serdes links comes in two varieties - SGMII mode, where the PHY provides its speed/duplex/flow control information to the MAC, and 1000base-X mode where both ends exchange their abilities and each resolve the link capabilities. This means we need a more flexible means to support these arrangements, particularly with the hotpluggable nature of SFP, where the PHY can be attached or detached after the network device has been brought up. Ethtool information can come from multiple sources: - we may have a PHY operating in either SGMII or 1000base-X mode, in which case we take ethtool/mii data directly from the PHY. - we may have a optical SFP module without a PHY, with the MAC operating in 1000base-X mode - the ethtool/mii data needs to come from the MAC. - we may have a copper SFP module with a PHY whic can't be accessed, which means we need to take ethtool/mii data from the MAC. Phylink aims to solve this by providing an intermediary between the MAC and PHY, providing a safe way for PHYs to be hotplugged, and allowing a SFP driver to reconfigure the serdes connection. Phylink also takes over support of fixed link connections, where the speed/duplex/flow control are fixed, but link status may be controlled by a GPIO signal. By avoiding the fixed-phy implementation, phylink can provide a faster response to link events: fixed-phy has to wait for phylib to operate its state machine, which can take several seconds. In comparison, phylink takes milliseconds. Signed-off-by: Russell King - remove sync status - rework supported and advertisment handling - add 1000base-x speed for fixed links - use functionality exported from phy-core, reworking __phylink_ethtool_ksettings_set for it Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 + include/linux/phylink.h | 145 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 include/linux/phylink.h (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 0a5e8e62c9e0..d78cd01ea513 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -182,6 +182,7 @@ static inline const char *phy_modes(phy_interface_t interface) #define MII_ADDR_C45 (1<<30) struct device; +struct phylink; struct sk_buff; /* @@ -469,6 +470,7 @@ struct phy_device { struct mutex lock; + struct phylink *phylink; struct net_device *attached_dev; u8 mdix; diff --git a/include/linux/phylink.h b/include/linux/phylink.h new file mode 100644 index 000000000000..76f054f39684 --- /dev/null +++ b/include/linux/phylink.h @@ -0,0 +1,145 @@ +#ifndef NETDEV_PCS_H +#define NETDEV_PCS_H + +#include +#include +#include + +struct device_node; +struct ethtool_cmd; +struct net_device; + +enum { + MLO_PAUSE_NONE, + MLO_PAUSE_ASYM = BIT(0), + MLO_PAUSE_SYM = BIT(1), + MLO_PAUSE_RX = BIT(2), + MLO_PAUSE_TX = BIT(3), + MLO_PAUSE_TXRX_MASK = MLO_PAUSE_TX | MLO_PAUSE_RX, + MLO_PAUSE_AN = BIT(4), + + MLO_AN_PHY = 0, /* Conventional PHY */ + MLO_AN_FIXED, /* Fixed-link mode */ + MLO_AN_SGMII, /* Cisco SGMII protocol */ + MLO_AN_8023Z, /* 1000base-X protocol */ +}; + +static inline bool phylink_autoneg_inband(unsigned int mode) +{ + return mode == MLO_AN_SGMII || mode == MLO_AN_8023Z; +} + +struct phylink_link_state { + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); + phy_interface_t interface; /* PHY_INTERFACE_xxx */ + int speed; + int duplex; + int pause; + unsigned int link:1; + unsigned int an_enabled:1; + unsigned int an_complete:1; +}; + +struct phylink_mac_ops { + /** + * validate: validate and update the link configuration + * @ndev: net_device structure associated with MAC + * @config: configuration to validate + * + * Update the %config->supported and %config->advertised masks + * clearing bits that can not be supported. + * + * Note: the PHY may be able to transform from one connection + * technology to another, so, eg, don't clear 1000BaseX just + * because the MAC is unable to support it. This is more about + * clearing unsupported speeds and duplex settings. + * + * If the %config->interface mode is %PHY_INTERFACE_MODE_1000BASEX + * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode + * based on %config->advertised and/or %config->speed. + */ + void (*validate)(struct net_device *ndev, unsigned long *supported, + struct phylink_link_state *state); + + /* Read the current link state from the hardware */ + int (*mac_link_state)(struct net_device *, struct phylink_link_state *); + + /* Configure the MAC */ + /** + * mac_config: configure the MAC for the selected mode and state + * @ndev: net_device structure for the MAC + * @mode: one of MLO_AN_FIXED, MLO_AN_PHY, MLO_AN_8023Z, MLO_AN_SGMII + * @state: state structure + * + * The action performed depends on the currently selected mode: + * + * %MLO_AN_FIXED, %MLO_AN_PHY: + * set the specified speed, duplex, pause mode, and phy interface + * mode in the provided @state. + * %MLO_AN_8023Z: + * place the link in 1000base-X mode, advertising the parameters + * given in advertising in @state. + * %MLO_AN_SGMII: + * place the link in Cisco SGMII mode - there is no advertisment + * to make as the PHY communicates the speed and duplex to the + * MAC over the in-band control word. Configuration of the pause + * mode is as per MLO_AN_PHY since this is not included. + */ + void (*mac_config)(struct net_device *ndev, unsigned int mode, + const struct phylink_link_state *state); + + /** + * mac_an_restart: restart 802.3z BaseX autonegotiation + * @ndev: net_device structure for the MAC + */ + void (*mac_an_restart)(struct net_device *ndev); + + void (*mac_link_down)(struct net_device *, unsigned int mode); + void (*mac_link_up)(struct net_device *, unsigned int mode, + struct phy_device *); +}; + +struct phylink *phylink_create(struct net_device *, struct device_node *, + phy_interface_t iface, const struct phylink_mac_ops *ops); +void phylink_destroy(struct phylink *); + +int phylink_connect_phy(struct phylink *, struct phy_device *); +int phylink_of_phy_connect(struct phylink *, struct device_node *); +void phylink_disconnect_phy(struct phylink *); + +void phylink_mac_change(struct phylink *, bool up); + +void phylink_start(struct phylink *); +void phylink_stop(struct phylink *); + +void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *); +int phylink_ethtool_set_wol(struct phylink *, struct ethtool_wolinfo *); + +int phylink_ethtool_ksettings_get(struct phylink *, + struct ethtool_link_ksettings *); +int phylink_ethtool_ksettings_set(struct phylink *, + const struct ethtool_link_ksettings *); +int phylink_ethtool_nway_reset(struct phylink *); +void phylink_ethtool_get_pauseparam(struct phylink *, + struct ethtool_pauseparam *); +int phylink_ethtool_set_pauseparam(struct phylink *, + struct ethtool_pauseparam *); +int phylink_init_eee(struct phylink *, bool); +int phylink_get_eee_err(struct phylink *); +int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); + +#define phylink_zero(bm) \ + bitmap_zero(bm, __ETHTOOL_LINK_MODE_MASK_NBITS) +#define __phylink_do_bit(op, bm, mode) \ + op(ETHTOOL_LINK_MODE_ ## mode ## _BIT, bm) + +#define phylink_set(bm, mode) __phylink_do_bit(__set_bit, bm, mode) +#define phylink_clear(bm, mode) __phylink_do_bit(__clear_bit, bm, mode) +#define phylink_test(bm, mode) __phylink_do_bit(test_bit, bm, mode) + +void phylink_set_port_modes(unsigned long *bits); + +#endif -- cgit From ce0aa27ff3f68ed4ea1631d33797e573b3508bfa Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:03:18 +0100 Subject: sfp: add sfp-bus to bridge between network devices and sfp cages Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/sfp.h | 434 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 include/linux/sfp.h (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h new file mode 100644 index 000000000000..4a906f560817 --- /dev/null +++ b/include/linux/sfp.h @@ -0,0 +1,434 @@ +#ifndef LINUX_SFP_H +#define LINUX_SFP_H + +#include + +struct __packed sfp_eeprom_base { + u8 phys_id; + u8 phys_ext_id; + u8 connector; +#if defined __BIG_ENDIAN_BITFIELD + u8 e10g_base_er:1; + u8 e10g_base_lrm:1; + u8 e10g_base_lr:1; + u8 e10g_base_sr:1; + u8 if_1x_sx:1; + u8 if_1x_lx:1; + u8 if_1x_copper_active:1; + u8 if_1x_copper_passive:1; + + u8 escon_mmf_1310_led:1; + u8 escon_smf_1310_laser:1; + u8 sonet_oc192_short_reach:1; + u8 sonet_reach_bit1:1; + u8 sonet_reach_bit2:1; + u8 sonet_oc48_long_reach:1; + u8 sonet_oc48_intermediate_reach:1; + u8 sonet_oc48_short_reach:1; + + u8 unallocated_5_7:1; + u8 sonet_oc12_smf_long_reach:1; + u8 sonet_oc12_smf_intermediate_reach:1; + u8 sonet_oc12_short_reach:1; + u8 unallocated_5_3:1; + u8 sonet_oc3_smf_long_reach:1; + u8 sonet_oc3_smf_intermediate_reach:1; + u8 sonet_oc3_short_reach:1; + + u8 e_base_px:1; + u8 e_base_bx10:1; + u8 e100_base_fx:1; + u8 e100_base_lx:1; + u8 e1000_base_t:1; + u8 e1000_base_cx:1; + u8 e1000_base_lx:1; + u8 e1000_base_sx:1; + + u8 fc_ll_v:1; + u8 fc_ll_s:1; + u8 fc_ll_i:1; + u8 fc_ll_l:1; + u8 fc_ll_m:1; + u8 fc_tech_sa:1; + u8 fc_tech_lc:1; + u8 fc_tech_electrical_inter_enclosure:1; + + u8 fc_tech_electrical_intra_enclosure:1; + u8 fc_tech_sn:1; + u8 fc_tech_sl:1; + u8 fc_tech_ll:1; + u8 sfp_ct_active:1; + u8 sfp_ct_passive:1; + u8 unallocated_8_1:1; + u8 unallocated_8_0:1; + + u8 fc_media_tw:1; + u8 fc_media_tp:1; + u8 fc_media_mi:1; + u8 fc_media_tv:1; + u8 fc_media_m6:1; + u8 fc_media_m5:1; + u8 unallocated_9_1:1; + u8 fc_media_sm:1; + + u8 fc_speed_1200:1; + u8 fc_speed_800:1; + u8 fc_speed_1600:1; + u8 fc_speed_400:1; + u8 fc_speed_3200:1; + u8 fc_speed_200:1; + u8 unallocated_10_1:1; + u8 fc_speed_100:1; +#elif defined __LITTLE_ENDIAN_BITFIELD + u8 if_1x_copper_passive:1; + u8 if_1x_copper_active:1; + u8 if_1x_lx:1; + u8 if_1x_sx:1; + u8 e10g_base_sr:1; + u8 e10g_base_lr:1; + u8 e10g_base_lrm:1; + u8 e10g_base_er:1; + + u8 sonet_oc3_short_reach:1; + u8 sonet_oc3_smf_intermediate_reach:1; + u8 sonet_oc3_smf_long_reach:1; + u8 unallocated_5_3:1; + u8 sonet_oc12_short_reach:1; + u8 sonet_oc12_smf_intermediate_reach:1; + u8 sonet_oc12_smf_long_reach:1; + u8 unallocated_5_7:1; + + u8 sonet_oc48_short_reach:1; + u8 sonet_oc48_intermediate_reach:1; + u8 sonet_oc48_long_reach:1; + u8 sonet_reach_bit2:1; + u8 sonet_reach_bit1:1; + u8 sonet_oc192_short_reach:1; + u8 escon_smf_1310_laser:1; + u8 escon_mmf_1310_led:1; + + u8 e1000_base_sx:1; + u8 e1000_base_lx:1; + u8 e1000_base_cx:1; + u8 e1000_base_t:1; + u8 e100_base_lx:1; + u8 e100_base_fx:1; + u8 e_base_bx10:1; + u8 e_base_px:1; + + u8 fc_tech_electrical_inter_enclosure:1; + u8 fc_tech_lc:1; + u8 fc_tech_sa:1; + u8 fc_ll_m:1; + u8 fc_ll_l:1; + u8 fc_ll_i:1; + u8 fc_ll_s:1; + u8 fc_ll_v:1; + + u8 unallocated_8_0:1; + u8 unallocated_8_1:1; + u8 sfp_ct_passive:1; + u8 sfp_ct_active:1; + u8 fc_tech_ll:1; + u8 fc_tech_sl:1; + u8 fc_tech_sn:1; + u8 fc_tech_electrical_intra_enclosure:1; + + u8 fc_media_sm:1; + u8 unallocated_9_1:1; + u8 fc_media_m5:1; + u8 fc_media_m6:1; + u8 fc_media_tv:1; + u8 fc_media_mi:1; + u8 fc_media_tp:1; + u8 fc_media_tw:1; + + u8 fc_speed_100:1; + u8 unallocated_10_1:1; + u8 fc_speed_200:1; + u8 fc_speed_3200:1; + u8 fc_speed_400:1; + u8 fc_speed_1600:1; + u8 fc_speed_800:1; + u8 fc_speed_1200:1; +#else +#error Unknown Endian +#endif + u8 encoding; + u8 br_nominal; + u8 rate_id; + u8 link_len[6]; + char vendor_name[16]; + u8 extended_cc; + char vendor_oui[3]; + char vendor_pn[16]; + char vendor_rev[4]; + union { + __be16 optical_wavelength; + u8 cable_spec; + }; + u8 reserved62; + u8 cc_base; +}; + +struct __packed sfp_eeprom_ext { + __be16 options; + u8 br_max; + u8 br_min; + char vendor_sn[16]; + char datecode[8]; + u8 diagmon; + u8 enhopts; + u8 sff8472_compliance; + u8 cc_ext; +}; + +struct __packed sfp_eeprom_id { + struct sfp_eeprom_base base; + struct sfp_eeprom_ext ext; +}; + +/* SFP EEPROM registers */ +enum { + SFP_PHYS_ID = 0x00, + SFP_PHYS_EXT_ID = 0x01, + SFP_CONNECTOR = 0x02, + SFP_COMPLIANCE = 0x03, + SFP_ENCODING = 0x0b, + SFP_BR_NOMINAL = 0x0c, + SFP_RATE_ID = 0x0d, + SFP_LINK_LEN_SM_KM = 0x0e, + SFP_LINK_LEN_SM_100M = 0x0f, + SFP_LINK_LEN_50UM_OM2_10M = 0x10, + SFP_LINK_LEN_62_5UM_OM1_10M = 0x11, + SFP_LINK_LEN_COPPER_1M = 0x12, + SFP_LINK_LEN_50UM_OM4_10M = 0x12, + SFP_LINK_LEN_50UM_OM3_10M = 0x13, + SFP_VENDOR_NAME = 0x14, + SFP_VENDOR_OUI = 0x25, + SFP_VENDOR_PN = 0x28, + SFP_VENDOR_REV = 0x38, + SFP_OPTICAL_WAVELENGTH_MSB = 0x3c, + SFP_OPTICAL_WAVELENGTH_LSB = 0x3d, + SFP_CABLE_SPEC = 0x3c, + SFP_CC_BASE = 0x3f, + SFP_OPTIONS = 0x40, /* 2 bytes, MSB, LSB */ + SFP_BR_MAX = 0x42, + SFP_BR_MIN = 0x43, + SFP_VENDOR_SN = 0x44, + SFP_DATECODE = 0x54, + SFP_DIAGMON = 0x5c, + SFP_ENHOPTS = 0x5d, + SFP_SFF8472_COMPLIANCE = 0x5e, + SFP_CC_EXT = 0x5f, + + SFP_PHYS_ID_SFP = 0x03, + SFP_PHYS_EXT_ID_SFP = 0x04, + SFP_CONNECTOR_UNSPEC = 0x00, + /* codes 01-05 not supportable on SFP, but some modules have single SC */ + SFP_CONNECTOR_SC = 0x01, + SFP_CONNECTOR_FIBERJACK = 0x06, + SFP_CONNECTOR_LC = 0x07, + SFP_CONNECTOR_MT_RJ = 0x08, + SFP_CONNECTOR_MU = 0x09, + SFP_CONNECTOR_SG = 0x0a, + SFP_CONNECTOR_OPTICAL_PIGTAIL = 0x0b, + SFP_CONNECTOR_MPO_1X12 = 0x0c, + SFP_CONNECTOR_MPO_2X16 = 0x0d, + SFP_CONNECTOR_HSSDC_II = 0x20, + SFP_CONNECTOR_COPPER_PIGTAIL = 0x21, + SFP_CONNECTOR_RJ45 = 0x22, + SFP_CONNECTOR_NOSEPARATE = 0x23, + SFP_CONNECTOR_MXC_2X16 = 0x24, + SFP_ENCODING_UNSPEC = 0x00, + SFP_ENCODING_8B10B = 0x01, + SFP_ENCODING_4B5B = 0x02, + SFP_ENCODING_NRZ = 0x03, + SFP_ENCODING_8472_MANCHESTER = 0x04, + SFP_ENCODING_8472_SONET = 0x05, + SFP_ENCODING_8472_64B66B = 0x06, + SFP_ENCODING_256B257B = 0x07, + SFP_ENCODING_PAM4 = 0x08, + SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), + SFP_OPTIONS_PAGING_A2 = BIT(12), + SFP_OPTIONS_RETIMER = BIT(11), + SFP_OPTIONS_COOLED_XCVR = BIT(10), + SFP_OPTIONS_POWER_DECL = BIT(9), + SFP_OPTIONS_RX_LINEAR_OUT = BIT(8), + SFP_OPTIONS_RX_DECISION_THRESH = BIT(7), + SFP_OPTIONS_TUNABLE_TX = BIT(6), + SFP_OPTIONS_RATE_SELECT = BIT(5), + SFP_OPTIONS_TX_DISABLE = BIT(4), + SFP_OPTIONS_TX_FAULT = BIT(3), + SFP_OPTIONS_LOS_INVERTED = BIT(2), + SFP_OPTIONS_LOS_NORMAL = BIT(1), + SFP_DIAGMON_DDM = BIT(6), + SFP_DIAGMON_INT_CAL = BIT(5), + SFP_DIAGMON_EXT_CAL = BIT(4), + SFP_DIAGMON_RXPWR_AVG = BIT(3), + SFP_DIAGMON_ADDRMODE = BIT(2), + SFP_ENHOPTS_ALARMWARN = BIT(7), + SFP_ENHOPTS_SOFT_TX_DISABLE = BIT(6), + SFP_ENHOPTS_SOFT_TX_FAULT = BIT(5), + SFP_ENHOPTS_SOFT_RX_LOS = BIT(4), + SFP_ENHOPTS_SOFT_RATE_SELECT = BIT(3), + SFP_ENHOPTS_APP_SELECT_SFF8079 = BIT(2), + SFP_ENHOPTS_SOFT_RATE_SFF8431 = BIT(1), + SFP_SFF8472_COMPLIANCE_NONE = 0x00, + SFP_SFF8472_COMPLIANCE_REV9_3 = 0x01, + SFP_SFF8472_COMPLIANCE_REV9_5 = 0x02, + SFP_SFF8472_COMPLIANCE_REV10_2 = 0x03, + SFP_SFF8472_COMPLIANCE_REV10_4 = 0x04, + SFP_SFF8472_COMPLIANCE_REV11_0 = 0x05, + SFP_SFF8472_COMPLIANCE_REV11_3 = 0x06, + SFP_SFF8472_COMPLIANCE_REV11_4 = 0x07, + SFP_SFF8472_COMPLIANCE_REV12_0 = 0x08, +}; + +/* SFP Diagnostics */ +enum { + /* Alarm and warnings stored MSB at lower address then LSB */ + SFP_TEMP_HIGH_ALARM = 0x00, + SFP_TEMP_LOW_ALARM = 0x02, + SFP_TEMP_HIGH_WARN = 0x04, + SFP_TEMP_LOW_WARN = 0x06, + SFP_VOLT_HIGH_ALARM = 0x08, + SFP_VOLT_LOW_ALARM = 0x0a, + SFP_VOLT_HIGH_WARN = 0x0c, + SFP_VOLT_LOW_WARN = 0x0e, + SFP_BIAS_HIGH_ALARM = 0x10, + SFP_BIAS_LOW_ALARM = 0x12, + SFP_BIAS_HIGH_WARN = 0x14, + SFP_BIAS_LOW_WARN = 0x16, + SFP_TXPWR_HIGH_ALARM = 0x18, + SFP_TXPWR_LOW_ALARM = 0x1a, + SFP_TXPWR_HIGH_WARN = 0x1c, + SFP_TXPWR_LOW_WARN = 0x1e, + SFP_RXPWR_HIGH_ALARM = 0x20, + SFP_RXPWR_LOW_ALARM = 0x22, + SFP_RXPWR_HIGH_WARN = 0x24, + SFP_RXPWR_LOW_WARN = 0x26, + SFP_LASER_TEMP_HIGH_ALARM = 0x28, + SFP_LASER_TEMP_LOW_ALARM = 0x2a, + SFP_LASER_TEMP_HIGH_WARN = 0x2c, + SFP_LASER_TEMP_LOW_WARN = 0x2e, + SFP_TEC_CUR_HIGH_ALARM = 0x30, + SFP_TEC_CUR_LOW_ALARM = 0x32, + SFP_TEC_CUR_HIGH_WARN = 0x34, + SFP_TEC_CUR_LOW_WARN = 0x36, + SFP_CAL_RXPWR4 = 0x38, + SFP_CAL_RXPWR3 = 0x3c, + SFP_CAL_RXPWR2 = 0x40, + SFP_CAL_RXPWR1 = 0x44, + SFP_CAL_RXPWR0 = 0x48, + SFP_CAL_TXI_SLOPE = 0x4c, + SFP_CAL_TXI_OFFSET = 0x4e, + SFP_CAL_TXPWR_SLOPE = 0x50, + SFP_CAL_TXPWR_OFFSET = 0x52, + SFP_CAL_T_SLOPE = 0x54, + SFP_CAL_T_OFFSET = 0x56, + SFP_CAL_V_SLOPE = 0x58, + SFP_CAL_V_OFFSET = 0x5a, + SFP_CHKSUM = 0x5f, + + SFP_TEMP = 0x60, + SFP_VCC = 0x62, + SFP_TX_BIAS = 0x64, + SFP_TX_POWER = 0x66, + SFP_RX_POWER = 0x68, + SFP_LASER_TEMP = 0x6a, + SFP_TEC_CUR = 0x6c, + + SFP_STATUS = 0x6e, + SFP_ALARM = 0x70, + + SFP_EXT_STATUS = 0x76, + SFP_VSL = 0x78, + SFP_PAGE = 0x7f, +}; + +struct device_node; +struct ethtool_eeprom; +struct ethtool_modinfo; +struct net_device; +struct sfp_bus; + +struct sfp_upstream_ops { + int (*module_insert)(void *, const struct sfp_eeprom_id *id); + void (*module_remove)(void *); + void (*link_down)(void *); + void (*link_up)(void *); + int (*connect_phy)(void *, struct phy_device *); + void (*disconnect_phy)(void *); +}; + +#if IS_ENABLED(CONFIG_SFP) +int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, + unsigned long *support); +phy_interface_t sfp_parse_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id); +void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, + unsigned long *support); + +int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); +int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, + u8 *data); +void sfp_upstream_start(struct sfp_bus *bus); +void sfp_upstream_stop(struct sfp_bus *bus); +struct sfp_bus *sfp_register_upstream(struct device_node *np, + struct net_device *ndev, void *upstream, + const struct sfp_upstream_ops *ops); +void sfp_unregister_upstream(struct sfp_bus *bus); +#else +static inline int sfp_parse_port(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *support) +{ + return PORT_OTHER; +} + +static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id) +{ + return PHY_INTERFACE_MODE_NA; +} + +static inline void sfp_parse_support(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *support) +{ +} + +static inline int sfp_get_module_info(struct sfp_bus *bus, + struct ethtool_modinfo *modinfo) +{ + return -EOPNOTSUPP; +} + +static inline int sfp_get_module_eeprom(struct sfp_bus *bus, + struct ethtool_eeprom *ee, u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline void sfp_upstream_start(struct sfp_bus *bus) +{ +} + +static inline void sfp_upstream_stop(struct sfp_bus *bus) +{ +} + +static inline struct sfp_bus *sfp_register_upstream(struct device_node *np, + struct net_device *ndev, void *upstream, + const struct sfp_upstream_ops *ops) +{ + return (struct sfp_bus *)-1; +} + +static inline void sfp_unregister_upstream(struct sfp_bus *bus) +{ +} +#endif + +#endif -- cgit From 770a1ad55763a8a783cb71078e0b33a6b91ad92b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Jul 2017 15:03:23 +0100 Subject: phylink: add module EEPROM support Add support for reading module EEPROMs through phylink. Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phylink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 76f054f39684..af67edd4ae38 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -125,6 +125,9 @@ void phylink_ethtool_get_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); +int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *); +int phylink_ethtool_get_module_eeprom(struct phylink *, + struct ethtool_eeprom *, u8 *); int phylink_init_eee(struct phylink *, bool); int phylink_get_eee_err(struct phylink *); int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -- cgit From 4faf783998b8cb88294e9df89032f473f8771b78 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 3 Aug 2017 20:38:51 -0700 Subject: tcp: fix cwnd undo in Reno and HTCP congestion controls Using ssthresh to revert cwnd is less reliable when ssthresh is bounded to 2 packets. This patch uses an existing variable in TCP "prior_cwnd" that snapshots the cwnd right before entering fast recovery and RTO recovery in Reno. This fixes the issue discussed in netdev thread: "A buggy behavior for Linux TCP Reno and HTCP" https://www.spinics.net/lists/netdev/msg444955.html Suggested-by: Neal Cardwell Reported-by: Wei Sun Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d7389ea36e10..267164a1d559 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,7 @@ struct tcp_sock { u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; - u32 prior_cwnd; /* Congestion window at start of Recovery. */ + u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ -- cgit From 233e7936c84b7d7dd90c10e2ba27abb5ab42956f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:51 +0800 Subject: sctp: remove the typedef sctp_lower_cwnd_t This patch is to remove the typedef sctp_lower_cwnd_t, and replace with enum sctp_lower_cwnd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 9b18044c551e..761064ee3ac5 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -378,12 +378,12 @@ typedef enum { } sctp_retransmit_reason_t; /* Reasons to lower cwnd. */ -typedef enum { +enum sctp_lower_cwnd { SCTP_LOWER_CWND_T3_RTX, SCTP_LOWER_CWND_FAST_RTX, SCTP_LOWER_CWND_ECNE, SCTP_LOWER_CWND_INACTIVE, -} sctp_lower_cwnd_t; +}; /* SCTP-AUTH Necessary constants */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 66cd7639b912..53802d8872d7 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -950,7 +950,8 @@ int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32); -void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); +void sctp_transport_lower_cwnd(struct sctp_transport *t, + enum sctp_lower_cwnd reason); void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); -- cgit From 125c29820252bfa5bf8081e75618e4ee7e9487da Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:52 +0800 Subject: sctp: remove the typedef sctp_retransmit_reason_t This patch is to remove the typedef sctp_retransmit_reason_t, and replace with enum sctp_retransmit_reason in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 761064ee3ac5..922fba5880d6 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -370,12 +370,12 @@ typedef enum { peer */ /* Reasons to retransmit. */ -typedef enum { +enum sctp_retransmit_reason { SCTP_RTXR_T3_RTX, SCTP_RTXR_FAST_RTX, SCTP_RTXR_PMTUD, SCTP_RTXR_T1_RTX, -} sctp_retransmit_reason_t; +}; /* Reasons to lower cwnd. */ enum sctp_lower_cwnd { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 53802d8872d7..5e872acee6e3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1054,8 +1054,8 @@ int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); -void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, - sctp_retransmit_reason_t); +void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, + enum sctp_retransmit_reason reason); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, -- cgit From 701ef3e6c74be771a76be39817941e68e7228644 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:53 +0800 Subject: sctp: remove the typedef sctp_scope_policy_t This patch is to remove the typedef sctp_scope_policy_t and keep it's members as an anonymous enum. It is also to define SCTP_SCOPE_POLICY_MAX to replace the num 3 in sysctl.c to make codes clear. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 922fba5880d6..acb03eb64842 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -341,12 +341,14 @@ typedef enum { SCTP_SCOPE_UNUSABLE, /* IPv4 unusable addresses */ } sctp_scope_t; -typedef enum { +enum { SCTP_SCOPE_POLICY_DISABLE, /* Disable IPv4 address scoping */ SCTP_SCOPE_POLICY_ENABLE, /* Enable IPv4 address scoping */ SCTP_SCOPE_POLICY_PRIVATE, /* Follow draft but allow IPv4 private addresses */ SCTP_SCOPE_POLICY_LINK, /* Follow draft but allow IPv4 link local addresses */ -} sctp_scope_policy_t; +}; + +#define SCTP_SCOPE_POLICY_MAX SCTP_SCOPE_POLICY_LINK /* Based on IPv4 scoping , * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, -- cgit From 1c662018d2d41ecc5550cbd74d29d2d32c164ed3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:54 +0800 Subject: sctp: remove the typedef sctp_scope_t This patch is to remove the typedef sctp_scope_t, and replace with enum sctp_scope in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 4 ++-- include/net/sctp/structs.h | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index acb03eb64842..0503bb70e5d9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -333,13 +333,13 @@ typedef enum { * At this point, the IPv6 scopes will be mapped to these internal scopes * as much as possible. */ -typedef enum { +enum sctp_scope { SCTP_SCOPE_GLOBAL, /* IPv4 global addresses */ SCTP_SCOPE_PRIVATE, /* IPv4 private addresses */ SCTP_SCOPE_LINK, /* IPv4 link local address */ SCTP_SCOPE_LOOPBACK, /* IPv4 loopback address */ SCTP_SCOPE_UNUSABLE, /* IPv4 unusable addresses */ -} sctp_scope_t; +}; enum { SCTP_SCOPE_POLICY_DISABLE, /* Disable IPv4 address scoping */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 84650fed1e6a..ca66b033ec38 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -94,8 +94,8 @@ /* * sctp/protocol.c */ -int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *, - sctp_scope_t, gfp_t gfp, int flags); +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, + enum sctp_scope, gfp_t gfp, int flags); struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5e872acee6e3..d771d418481f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -449,7 +449,7 @@ struct sctp_af { int (*addr_valid) (union sctp_addr *, struct sctp_sock *, const struct sk_buff *); - sctp_scope_t (*scope) (union sctp_addr *); + enum sctp_scope (*scope)(union sctp_addr *); void (*inaddr_any) (union sctp_addr *, __be16); int (*is_any) (const union sctp_addr *); int (*available) (union sctp_addr *, @@ -1111,7 +1111,7 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); void sctp_bind_addr_free(struct sctp_bind_addr *); int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, gfp_t gfp, + enum sctp_scope scope, gfp_t gfp, int flags); int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, @@ -1135,8 +1135,9 @@ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, __u16 port, gfp_t gfp); -sctp_scope_t sctp_scope(const union sctp_addr *); -int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope); +enum sctp_scope sctp_scope(const union sctp_addr *addr); +int sctp_in_scope(struct net *net, const union sctp_addr *addr, + const enum sctp_scope scope); int sctp_is_any(struct sock *sk, const union sctp_addr *addr); int sctp_is_ep_boundall(struct sock *sk); @@ -1925,8 +1926,8 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base) struct sctp_association * -sctp_association_new(const struct sctp_endpoint *, const struct sock *, - sctp_scope_t scope, gfp_t gfp); +sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk, + enum sctp_scope scope, gfp_t gfp); void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); @@ -1967,8 +1968,8 @@ void sctp_assoc_set_primary(struct sctp_association *, struct sctp_transport *); void sctp_assoc_del_nonprimary_peers(struct sctp_association *, struct sctp_transport *); -int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *, - sctp_scope_t, gfp_t); +int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, + enum sctp_scope scope, gfp_t gfp); int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *, struct sctp_cookie*, gfp_t gfp); -- cgit From 0ceaeebe28d46e49c865f88a3e3ca75f6cf13e1f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:55 +0800 Subject: sctp: remove the typedef sctp_transport_cmd_t This patch is to remove the typedef sctp_transport_cmd_t, and replace with enum sctp_transport_cmd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 0503bb70e5d9..8ce6d3263e41 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -320,11 +320,11 @@ typedef enum { } sctp_xmit_t; /* These are the commands for manipulating transports. */ -typedef enum { +enum sctp_transport_cmd { SCTP_TRANSPORT_UP, SCTP_TRANSPORT_DOWN, SCTP_TRANSPORT_PF, -} sctp_transport_cmd_t; +}; /* These are the address scopes defined mainly for IPv4 addresses * based on draft of SCTP IPv4 scoping . diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d771d418481f..d098d1c4ed74 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1947,9 +1947,10 @@ void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); void sctp_assoc_rm_peer(struct sctp_association *asoc, struct sctp_transport *peer); -void sctp_assoc_control_transport(struct sctp_association *, - struct sctp_transport *, - sctp_transport_cmd_t, sctp_sn_error_t); +void sctp_assoc_control_transport(struct sctp_association *asoc, + struct sctp_transport *transport, + enum sctp_transport_cmd command, + sctp_sn_error_t error); struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32); struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, struct net *, -- cgit From 8496561430df6e2c21b4ed37bd93604d3acaf5d6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:56 +0800 Subject: sctp: remove the typedef sctp_sock_state_t This patch is to remove the typedef sctp_sock_state_t, and replace with enum sctp_sock_state in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 +++--- include/net/sctp/sctp.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8ce6d3263e41..049868ea7ae9 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -42,7 +42,7 @@ #include #include /* For ipv6hdr. */ -#include /* For TCP states used in sctp_sock_state_t */ +#include /* For TCP states used in enum sctp_sock_state */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; @@ -214,13 +214,13 @@ typedef enum { * - A socket in SCTP_SS_ESTABLISHED state indicates that it has a single * association. */ -typedef enum { +enum sctp_sock_state { SCTP_SS_CLOSED = TCP_CLOSE, SCTP_SS_LISTENING = TCP_LISTEN, SCTP_SS_ESTABLISHING = TCP_SYN_SENT, SCTP_SS_ESTABLISHED = TCP_ESTABLISHED, SCTP_SS_CLOSING = TCP_CLOSE_WAIT, -} sctp_sock_state_t; +}; /* These functions map various type to printable names. */ const char *sctp_cname(const sctp_subtype_t); /* chunk types */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ca66b033ec38..0022bc713434 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -565,7 +565,8 @@ static inline int __sctp_state(const struct sctp_association *asoc, /* Is the socket in this state? */ #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state)) -static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state) +static inline int __sctp_sstate(const struct sock *sk, + enum sctp_sock_state state) { return sk->sk_state == state; } -- cgit From 86b36f2a9b9ea58dd2100b0e6f1f45a1f67ee95e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:57 +0800 Subject: sctp: remove the typedef sctp_xmit_t This patch is to remove the typedef sctp_xmit_t, and replace with enum sctp_xmit in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/structs.h | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 049868ea7ae9..311b2e3773e8 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -312,12 +312,12 @@ enum { SCTP_MAX_GABS = 16 }; /* These return values describe the success or failure of a number of * routines which form the lower interface to SCTP_outqueue. */ -typedef enum { +enum sctp_xmit { SCTP_XMIT_OK, SCTP_XMIT_PMTU_FULL, SCTP_XMIT_RWND_FULL, SCTP_XMIT_DELAY, -} sctp_xmit_t; +}; /* These are the commands for manipulating transports. */ enum sctp_transport_cmd { diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d098d1c4ed74..6fab67e7f1e5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -697,10 +697,11 @@ struct sctp_packet { void sctp_packet_init(struct sctp_packet *, struct sctp_transport *, __u16 sport, __u16 dport); void sctp_packet_config(struct sctp_packet *, __u32 vtag, int); -sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *, - struct sctp_chunk *, int, gfp_t); -sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *, - struct sctp_chunk *); +enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk, + int one_packet, gfp_t gfp); +enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk); int sctp_packet_transmit(struct sctp_packet *, gfp_t); void sctp_packet_free(struct sctp_packet *); -- cgit From 4785c7ae1848244da3435ba5269f6288c15975c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:58 +0800 Subject: sctp: remove the typedef sctp_ierror_t This patch is to remove the typedef sctp_ierror_t, and replace with enum sctp_ierror in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 311b2e3773e8..9a694653b708 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -155,8 +155,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) - sizeof(struct sctp_data_chunk))) /* Internal error codes */ -typedef enum { - +enum sctp_ierror { SCTP_IERROR_NO_ERROR = 0, SCTP_IERROR_BASE = 1000, SCTP_IERROR_NO_COOKIE, @@ -177,7 +176,7 @@ typedef enum { SCTP_IERROR_PROTO_VIOLATION, SCTP_IERROR_ERROR, SCTP_IERROR_ABORT, -} sctp_ierror_t; +}; -- cgit From 5210601945f5aedaf2d7f13a88436e27a39c6a8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 19:59:59 +0800 Subject: sctp: remove the typedef sctp_state_t This patch is to remove the typedef sctp_state_t, and replace with enum sctp_state in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 2 +- include/net/sctp/sm.h | 10 +++++----- include/net/sctp/structs.h | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 1d5f6ff3f440..be12ec946628 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -126,7 +126,7 @@ typedef union { __u8 u8; int error; __be16 err; - sctp_state_t state; + enum sctp_state state; sctp_event_timeout_t to; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -167,7 +167,7 @@ SCTP_ARG_CONSTRUCTOR(U16, __u16, u16) SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ -SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) +SCTP_ARG_CONSTRUCTOR(STATE, enum sctp_state, state) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 9a694653b708..db9f40b657ba 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -181,7 +181,7 @@ enum sctp_ierror { /* SCTP state defines for internal state machine */ -typedef enum { +enum sctp_state { SCTP_STATE_CLOSED = 0, SCTP_STATE_COOKIE_WAIT = 1, @@ -192,7 +192,7 @@ typedef enum { SCTP_STATE_SHUTDOWN_RECEIVED = 6, SCTP_STATE_SHUTDOWN_ACK_SENT = 7, -} sctp_state_t; +}; #define SCTP_STATE_MAX SCTP_STATE_SHUTDOWN_ACK_SENT #define SCTP_STATE_NUM_STATES (SCTP_STATE_MAX + 1) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 0022bc713434..24ff7931d38c 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -558,7 +558,7 @@ static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) /* Is the association in this state? */ #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state)) static inline int __sctp_state(const struct sctp_association *asoc, - sctp_state_t state) + enum sctp_state state) { return asoc->state == state; } diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 860f378333b5..281e8d1e0752 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -175,10 +175,10 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *, - sctp_event_t, - sctp_state_t, - sctp_subtype_t); +const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, + sctp_event_t event_type, + enum sctp_state state, + sctp_subtype_t event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, @@ -313,7 +313,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, + enum sctp_state state, struct sctp_endpoint *, struct sctp_association *asoc, void *event_arg, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6fab67e7f1e5..fbe6e81b889b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1577,7 +1577,7 @@ struct sctp_association { * * State takes values from SCTP_STATE_*. */ - sctp_state_t state; + enum sctp_state state; /* Overall : The overall association error count. * Error Count : [Clear this any time I get something.] -- cgit From dc1e0e6eb8b2e2c6b5c6cdfa4f0cc789e9516de5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:00 +0800 Subject: sctp: remove the typedef sctp_event_primitive_t This patch is to remove the typedef sctp_event_primitive_t, and replace with enum sctp_event_primitive in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index db9f40b657ba..162ee954a6af 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -110,7 +110,7 @@ typedef enum { #define SCTP_NUM_OTHER_TYPES (SCTP_EVENT_OTHER_MAX + 1) /* These are primitive requests from the ULP. */ -typedef enum { +enum sctp_event_primitive { SCTP_PRIMITIVE_ASSOCIATE = 0, SCTP_PRIMITIVE_SHUTDOWN, SCTP_PRIMITIVE_ABORT, @@ -118,7 +118,7 @@ typedef enum { SCTP_PRIMITIVE_REQUESTHEARTBEAT, SCTP_PRIMITIVE_ASCONF, SCTP_PRIMITIVE_RECONF, -} sctp_event_primitive_t; +}; #define SCTP_EVENT_PRIMITIVE_MAX SCTP_PRIMITIVE_RECONF #define SCTP_NUM_PRIMITIVE_TYPES (SCTP_EVENT_PRIMITIVE_MAX + 1) @@ -133,7 +133,7 @@ typedef union { enum sctp_cid chunk; sctp_event_timeout_t timeout; sctp_event_other_t other; - sctp_event_primitive_t primitive; + enum sctp_event_primitive primitive; } sctp_subtype_t; #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \ @@ -144,7 +144,7 @@ SCTP_ST_## _name (_type _arg) \ SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) -SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) +SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA) -- cgit From a0f098d0385a40f9c4c8a2ce48d075f77ea7edd8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:01 +0800 Subject: sctp: remove the typedef sctp_event_other_t This patch is to remove the typedef sctp_event_other_t, and replace with enum sctp_event_other in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 162ee954a6af..fd8a80ec573f 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -101,10 +101,10 @@ typedef enum { #define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE #define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1) -typedef enum { +enum sctp_event_other { SCTP_EVENT_NO_PENDING_TSN = 0, SCTP_EVENT_ICMP_PROTO_UNREACH, -} sctp_event_other_t; +}; #define SCTP_EVENT_OTHER_MAX SCTP_EVENT_ICMP_PROTO_UNREACH #define SCTP_NUM_OTHER_TYPES (SCTP_EVENT_OTHER_MAX + 1) @@ -132,7 +132,7 @@ enum sctp_event_primitive { typedef union { enum sctp_cid chunk; sctp_event_timeout_t timeout; - sctp_event_other_t other; + enum sctp_event_other other; enum sctp_event_primitive primitive; } sctp_subtype_t; @@ -143,7 +143,7 @@ SCTP_ST_## _name (_type _arg) \ SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) -SCTP_SUBTYPE_CONSTRUCTOR(OTHER, sctp_event_other_t, other) +SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -- cgit From 19cd1592a24754e16d48398812d5f69b63f674dd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:02 +0800 Subject: sctp: remove the typedef sctp_event_timeout_t This patch is to remove the typedef sctp_event_timeout_t, and replace with enum sctp_event_timeout in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 4 ++-- include/net/sctp/constants.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index be12ec946628..376cb78b6247 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -127,7 +127,7 @@ typedef union { int error; __be16 err; enum sctp_state state; - sctp_event_timeout_t to; + enum sctp_event_timeout to; struct sctp_chunk *chunk; struct sctp_association *asoc; struct sctp_transport *transport; @@ -168,7 +168,7 @@ SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ SCTP_ARG_CONSTRUCTOR(STATE, enum sctp_state, state) -SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) +SCTP_ARG_CONSTRUCTOR(TO, enum sctp_event_timeout, to) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index fd8a80ec573f..fb931f07e83a 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -84,7 +84,7 @@ typedef enum { * SCTP_ULP_* to the list of possible chunks. */ -typedef enum { +enum sctp_event_timeout { SCTP_EVENT_TIMEOUT_NONE = 0, SCTP_EVENT_TIMEOUT_T1_COOKIE, SCTP_EVENT_TIMEOUT_T1_INIT, @@ -96,7 +96,7 @@ typedef enum { SCTP_EVENT_TIMEOUT_RECONF, SCTP_EVENT_TIMEOUT_SACK, SCTP_EVENT_TIMEOUT_AUTOCLOSE, -} sctp_event_timeout_t; +}; #define SCTP_EVENT_TIMEOUT_MAX SCTP_EVENT_TIMEOUT_AUTOCLOSE #define SCTP_NUM_TIMEOUT_TYPES (SCTP_EVENT_TIMEOUT_MAX + 1) @@ -131,7 +131,7 @@ enum sctp_event_primitive { typedef union { enum sctp_cid chunk; - sctp_event_timeout_t timeout; + enum sctp_event_timeout timeout; enum sctp_event_other other; enum sctp_event_primitive primitive; } sctp_subtype_t; @@ -142,7 +142,7 @@ SCTP_ST_## _name (_type _arg) \ { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) -SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, sctp_event_timeout_t, timeout) +SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, enum sctp_event_timeout, timeout) SCTP_SUBTYPE_CONSTRUCTOR(OTHER, enum sctp_event_other, other) SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, enum sctp_event_primitive, primitive) -- cgit From 61f0eb072294a148f707335d4d7f858b2af73770 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:03 +0800 Subject: sctp: remove the typedef sctp_event_t This patch is to remove the typedef sctp_event_t, and replace with enum sctp_event in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 6 ++---- include/net/sctp/sm.h | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index fb931f07e83a..3181e0f95b60 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -71,14 +71,12 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; SCTP_NUM_AUTH_CHUNK_TYPES) /* These are the different flavours of event. */ -typedef enum { - +enum sctp_event { SCTP_EVENT_T_CHUNK = 1, SCTP_EVENT_T_TIMEOUT, SCTP_EVENT_T_OTHER, SCTP_EVENT_T_PRIMITIVE - -} sctp_event_t; +}; /* As a convenience for the state machine, we append SCTP_EVENT_* and * SCTP_ULP_* to the list of possible chunks. diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 281e8d1e0752..96f54cff7964 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -176,7 +176,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - sctp_event_t event_type, + enum sctp_event event_type, enum sctp_state state, sctp_subtype_t event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); @@ -312,12 +312,10 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ -int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - enum sctp_state state, - struct sctp_endpoint *, - struct sctp_association *asoc, - void *event_arg, - gfp_t gfp); +int sctp_do_sm(struct net *net, enum sctp_event event_type, + sctp_subtype_t subtype, enum sctp_state state, + struct sctp_endpoint *ep, struct sctp_association *asoc, + void *event_arg, gfp_t gfp); /* 2nd level prototypes */ void sctp_generate_t3_rtx_event(unsigned long peer); -- cgit From bfc6f8270fefb323662d1d7713f940149f27b7f1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 Aug 2017 20:00:04 +0800 Subject: sctp: remove the typedef sctp_subtype_t This patch is to remove the typedef sctp_subtype_t, and replace with union sctp_subtype in the places where it's using this typedef. Note that it doesn't fix many indents although it should, as sctp_disposition_t's removal would mess them up again. So better to fix them when removing sctp_disposition_t in later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 18 +++++++++--------- include/net/sctp/sm.h | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 3181e0f95b60..deaafa9b09cb 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -124,20 +124,20 @@ enum sctp_event_primitive { /* We define here a utility type for manipulating subtypes. * The subtype constructors all work like this: * - * sctp_subtype_t foo = SCTP_ST_CHUNK(SCTP_CID_INIT); + * union sctp_subtype foo = SCTP_ST_CHUNK(SCTP_CID_INIT); */ -typedef union { +union sctp_subtype { enum sctp_cid chunk; enum sctp_event_timeout timeout; enum sctp_event_other other; enum sctp_event_primitive primitive; -} sctp_subtype_t; +}; #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \ -static inline sctp_subtype_t \ +static inline union sctp_subtype \ SCTP_ST_## _name (_type _arg) \ -{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; } +{ union sctp_subtype _retval; _retval._elt = _arg; return _retval; } SCTP_SUBTYPE_CONSTRUCTOR(CHUNK, enum sctp_cid, chunk) SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT, enum sctp_event_timeout, timeout) @@ -220,10 +220,10 @@ enum sctp_sock_state { }; /* These functions map various type to printable names. */ -const char *sctp_cname(const sctp_subtype_t); /* chunk types */ -const char *sctp_oname(const sctp_subtype_t); /* other events */ -const char *sctp_tname(const sctp_subtype_t); /* timeouts */ -const char *sctp_pname(const sctp_subtype_t); /* primitives */ +const char *sctp_cname(const union sctp_subtype id); /* chunk types */ +const char *sctp_oname(const union sctp_subtype id); /* other events */ +const char *sctp_tname(const union sctp_subtype id); /* timeouts */ +const char *sctp_pname(const union sctp_subtype id); /* primitives */ /* This is a table of printable names of sctp_state_t's. */ extern const char *const sctp_state_tbl[]; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 96f54cff7964..1e7651c3b158 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -73,7 +73,7 @@ typedef struct { typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_endpoint *, const struct sctp_association *, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, sctp_cmd_seq_t *); typedef void (sctp_timer_event_t) (unsigned long); @@ -175,10 +175,11 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - enum sctp_event event_type, - enum sctp_state state, - sctp_subtype_t event_subtype); +const sctp_sm_table_entry_t *sctp_sm_lookup_event( + struct net *net, + enum sctp_event event_type, + enum sctp_state state, + union sctp_subtype event_subtype); int sctp_chunk_iif(const struct sctp_chunk *); struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *, struct sctp_chunk *, @@ -313,7 +314,7 @@ struct sctp_chunk *sctp_process_strreset_resp( /* Prototypes for statetable processing. */ int sctp_do_sm(struct net *net, enum sctp_event event_type, - sctp_subtype_t subtype, enum sctp_state state, + union sctp_subtype subtype, enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, gfp_t gfp); -- cgit From eeb66cdb682678bfd1f02a4547e3649b38ffea7e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 4 Jun 2017 23:11:55 +0300 Subject: net/mlx5: Separate between E-Switch and MPFS Multi-Physical Function Switch (MPFs) is required for when multi-PF configuration is enabled to allow passing user configured unicast MAC addresses to the requesting PF. Before this patch eswitch.c used to manage the HW MPFS l2 table, E-Switch always (regardless of sriov) enabled vport(0) (NIC PF) vport's contexts update on unicast mac address list changes, to populate the PF's MPFS L2 table accordingly. In downstream patch we would like to allow compiling the driver without E-Switch functionalities, for that we move MPFS l2 table logic out of eswitch.c into its own file, and provide Kconfig flag (MLX5_MPFS) to allow compiling out MPFS for those who don't want Multi-PF support. NIC PF netdevice will now directly update MPFS l2 table via the new MPFS API. VF netdevice has no access to MPFS L2 table, so E-Switch will remain responsible of updating its MPFS l2 table on behalf of its VFs. Due to this change we also don't require enabling vport(0) (PF vport) unicast mac changes events anymore, for when SRIOV is not enabled. Which means E-Switch is now activated only on SRIOV activation, and not required otherwise. Signed-off-by: Saeed Mahameed Cc: Jes Sorensen Cc: kernel-team@fb.com --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..88d6eb5b3a76 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -550,6 +550,7 @@ struct mlx5_fc_stats { unsigned long sampling_interval; /* jiffies */ }; +struct mlx5_mpfs; struct mlx5_eswitch; struct mlx5_lag; struct mlx5_pagefault; @@ -647,6 +648,7 @@ struct mlx5_priv { spinlock_t ctx_lock; struct mlx5_flow_steering *steering; + struct mlx5_mpfs *mpfs; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; struct mlx5_lag *lag; -- cgit From 97834eba7c194659a72c5bb0f8c19c7055bb69ea Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 7 Jun 2017 12:14:24 +0300 Subject: net/mlx5: Delay events till ib registration ends When mlx5_ib registers itself to mlx5_core as an interface, it will call mlx5_add_device which will call mlx5_ib interface add callback, in case the latter successfully returns, only then mlx5_core will add it to the interface list and async events will be forwarded to mlx5_ib. Between mlx5_ib interface add callback and mlx5_core adding the mlx5_ib interface to its devices list, arriving mlx5_core events can be missed by the new mlx5_ib registering interface. In other words: thread 1: mlx5_ib: mlx5_register_interface(dev) thread 1: mlx5_core: mlx5_add_device(dev) thread 1: mlx5_core: ctx = dev->add => (mlx5_ib)->mlx5_ib_add thread 2: mlx5_core_event: **new event arrives, forward to dev_list thread 1: mlx5_core: add_ctx_to_dev_list(ctx) /* previous event was missed by the new interface.*/ It is ok to miss events before dev->add (mlx5_ib)->mlx5_ib_add_device but not after. We fix this race by accumulating the events that come between the ib_register_device (inside mlx5_add_device->(dev->add)) till the adding to the list completes and fire them to the new registering interface after that. Fixes: f1ee87fe55c8 ("net/mlx5: Organize device list API in one place") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 88d6eb5b3a76..d26f18b39c4a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -647,6 +647,9 @@ struct mlx5_priv { struct list_head ctx_list; spinlock_t ctx_lock; + struct list_head waiting_events_list; + bool is_accum_events; + struct mlx5_flow_steering *steering; struct mlx5_mpfs *mpfs; struct mlx5_eswitch *eswitch; -- cgit From 61690e09c3b4a40401bdaa89ee11522cc0dc4b11 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Mon, 10 Jul 2017 14:35:10 +0300 Subject: net/mlx5: Fix counter list hardware structure The counter list hardware structure doesn't contain a clear and num_of_counters fields, remove them. These wrong fields were never used by the driver hence no other driver changes. Fixes: a351a1b03bf1 ("net/mlx5: Introduce bulk reading of flow counters") Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3030121b4746..f847a3a57913 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1071,8 +1071,7 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 clear[0x1]; - u8 num_of_counters[0xf]; + u8 reserved_at_0[0x10]; u8 flow_counter_id[0x10]; u8 reserved_at_20[0x20]; -- cgit From a8ffcc741acb3c7f3dcf4c7d001209aa0995a5f1 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Sun, 9 Jul 2017 13:39:30 +0300 Subject: net/mlx5: Increase the maximum flow counters supported Read new NIC capability field which represnts 16 MSBs of the max flow counters number supported (max_flow_counter_31_16). Backward compatibility with older firmware is preserved, the modified driver reads max_flow_counter_31_16 as 0 from the older firmware and uses up to 64K counters. Changed flow counter id from 16 bits to 32 bits. Backward compatibility with older firmware is preserved as we kept the 16 LSBs of the counter id in place and added 16 MSBs from reserved field. Changed the background bulk reading of flow counters to work in chunks of at most 32K counters, to make sure we don't attempt to allocate very large buffers. Signed-off-by: Rabie Loulou Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f847a3a57913..c99daffc3c3c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -963,7 +963,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_2a0[0x10]; u8 max_wqe_sz_rq[0x10]; - u8 reserved_at_2c0[0x10]; + u8 max_flow_counter_31_16[0x10]; u8 max_wqe_sz_sq_dc[0x10]; u8 reserved_at_2e0[0x7]; @@ -981,7 +981,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_340[0x8]; u8 log_max_flow_counter_bulk[0x8]; - u8 max_flow_counter[0x10]; + u8 max_flow_counter_15_0[0x10]; u8 reserved_at_360[0x3]; @@ -1071,8 +1071,7 @@ struct mlx5_ifc_dest_format_struct_bits { }; struct mlx5_ifc_flow_counter_list_bits { - u8 reserved_at_0[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_20[0x20]; }; @@ -4402,8 +4401,7 @@ struct mlx5_ifc_query_flow_counter_in_bits { u8 reserved_at_c1[0xf]; u8 num_of_counters[0x10]; - u8 reserved_at_e0[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; }; struct mlx5_ifc_query_esw_vport_context_out_bits { @@ -6271,8 +6269,7 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_60[0x20]; }; @@ -7097,8 +7094,7 @@ struct mlx5_ifc_alloc_flow_counter_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x10]; - u8 flow_counter_id[0x10]; + u8 flow_counter_id[0x20]; u8 reserved_at_60[0x20]; }; -- cgit From c04ac679c6b86e4e36fbb675c6c061b4091f5810 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 7 Aug 2017 11:29:48 +0100 Subject: ACPI: Introduce DMA ranges parsing Some devices have limited addressing capabilities and cannot reference the whole memory address space while carrying out DMA operations (eg some devices with bus address bits range smaller than system bus - which prevents them from using bus addresses that are otherwise valid for the system). The ACPI _DMA object allows bus devices to define the DMA window that is actually addressable by devices that sit upstream the bus, therefore providing a means to parse and initialize the devices DMA masks and addressable DMA range size. By relying on the generic ACPI kernel layer to retrieve and parse resources, introduce ACPI core code to parse the _DMA object. Signed-off-by: Lorenzo Pieralisi Tested-by: Nate Watterson Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 2 ++ include/linux/acpi.h | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 68bc6be447fd..07eb963b5026 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -578,6 +578,8 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); +int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, + u64 *size); int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); void acpi_dma_deconfigure(struct device *dev); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c749eef1daa1..a5eaff9f2c6d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -427,6 +427,8 @@ void acpi_dev_free_resource_list(struct list_head *list); int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, int (*preproc)(struct acpi_resource *, void *), void *preproc_data); +int acpi_dev_get_dma_resources(struct acpi_device *adev, + struct list_head *list); int acpi_dev_filter_resource_type(struct acpi_resource *ares, unsigned long types); @@ -774,6 +776,12 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } +static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, + u64 *offset, u64 *size) +{ + return -ENODEV; +} + static inline int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { -- cgit From 7ad4263980826e8b02e121af22f4f4c9103fe86d Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 7 Aug 2017 11:29:49 +0100 Subject: ACPI: Make acpi_dma_configure() DMA regions aware Current ACPI DMA configuration set-up device DMA capabilities through kernel defaults that do not take into account platform specific DMA configurations reported by firmware. By leveraging the ACPI acpi_dev_get_dma_resources() API, add code in acpi_dma_configure() to retrieve the DMA regions to correctly set-up PCI devices DMA parameters. Rework the ACPI IORT kernel API to make sure they can accommodate the DMA set-up required by firmware. By making PCI devices DMA set-up ACPI IORT specific, the kernel is shielded from unwanted regressions that could be triggered by parsing DMA resources on arches that were previously ignoring them (ie x86/ia64), leaving kernel behaviour unchanged on those arches. Signed-off-by: Lorenzo Pieralisi Acked-by: Will Deacon Tested-by: Nate Watterson Signed-off-by: Rafael J. Wysocki --- include/linux/acpi_iort.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 8379d406ad2e..8d3f0bf80379 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -36,7 +36,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ -void iort_set_dma_mask(struct device *dev); +void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); const struct iommu_ops *iort_iommu_configure(struct device *dev); #else static inline void acpi_iort_init(void) { } @@ -47,7 +47,8 @@ static inline struct irq_domain *iort_get_device_domain(struct device *dev, { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ -static inline void iort_set_dma_mask(struct device *dev) { } +static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, + u64 *size) { } static inline const struct iommu_ops *iort_iommu_configure(struct device *dev) { return NULL; } -- cgit From 5e2f30b756a37bd80c5b0471d0e10d769ab2eb9a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 3 Aug 2017 18:11:04 +0200 Subject: KVM: nVMX: get rid of nested_get_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nested_get_page() just sounds confusing. All we want is a page from G1. This is even unrelated to nested. Let's introduce kvm_vcpu_gpa_to_page() so we don't get too lengthy lines. Reviewed-by: Paolo Bonzini Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář [Squash pasto fix from Wanpeng Li. - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 21a6fd6c44af..28112d7917c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -985,6 +985,12 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } +static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, + gpa_t gpa) +{ + return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); +} + static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); -- cgit From dfc8e9165cfe0b50438f51df5a82ab1dcc08baf4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Aug 2017 17:22:14 +0200 Subject: fbcon: mark dummy functions 'inline' The newly introduced fb_console_init/exit declarations have a dummy alternative that is marked 'static' in the header but not inline, leading to a warning whenever the header gets included: In file included from drivers/video/fbdev/core/fbmem.c:35:0: include/linux/fbcon.h:9:13: error: 'fb_console_exit' defined but not used [-Werror=unused-function] This adds the intended 'inline'. Fixes: 6104c37094e7 ("fbcon: Make fbcon a built-time depency for fbdev") Signed-off-by: Arnd Bergmann Reviewed-by: Daniel Vetter Cc: Sean Paul Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/fbcon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 0fac6305d51c..f68a7db14165 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -5,8 +5,8 @@ void __init fb_console_init(void); void __exit fb_console_exit(void); #else -static void fb_console_init(void) {} -static void fb_console_exit(void) {} +static inline void fb_console_init(void) {} +static inline void fb_console_exit(void) {} #endif #endif /* _LINUX_FBCON_H */ -- cgit From 353c64dd19ebc99786c3578ea0444e1b5b1bb172 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 7 Aug 2017 09:08:52 -0300 Subject: ASoC: soc-core: Remove unneeded dentry member from snd_soc_codec There is no need to have the *debugfs_reg dentry member as part of the snd_soc_codec structure as its only usage is inside soc_init_codec_debugfs(). Use a local dentry variable instead. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- include/sound/soc.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 10b71beb9f13..aad3f14966f1 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -903,10 +903,6 @@ struct snd_soc_codec { /* component */ struct snd_soc_component component; - -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs_reg; -#endif }; /* codec driver */ -- cgit From cf9e829eaf0195e722bc3a3929002a3fbe74dafa Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 7 Aug 2017 02:06:23 +0000 Subject: ASoC: soc-core: rename "cmpnt" to "component" To unify notation, to readable. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 9c94b97c17f8..de57a8ad057b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -469,10 +469,10 @@ int snd_soc_register_codec(struct device *dev, struct snd_soc_dai_driver *dai_drv, int num_dai); void snd_soc_unregister_codec(struct device *dev); int snd_soc_register_component(struct device *dev, - const struct snd_soc_component_driver *cmpnt_drv, + const struct snd_soc_component_driver *component_driver, struct snd_soc_dai_driver *dai_drv, int num_dai); int devm_snd_soc_register_component(struct device *dev, - const struct snd_soc_component_driver *cmpnt_drv, + const struct snd_soc_component_driver *component_driver, struct snd_soc_dai_driver *dai_drv, int num_dai); void snd_soc_unregister_component(struct device *dev); int snd_soc_cache_init(struct snd_soc_codec *codec); -- cgit From 2606706e4d7b89ebf13f35895a7dfe00e394e782 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 30 May 2017 13:48:22 +0200 Subject: backlight: gpio_backlight: Delete pdata inversion The option to invert the output of the GPIO (active low) is not used by the only platform still using platform data to set up a GPIO backlight (one SH board). Delete the option as we do not expect to expand the use of board files for this driver, and GPIO descriptors intrinsically keep track of any signal inversion. Signed-off-by: Linus Walleij Acked-by: Daniel Thompson Reviewed-by: Laurent Pinchart Signed-off-by: Lee Jones --- include/linux/platform_data/gpio_backlight.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/gpio_backlight.h b/include/linux/platform_data/gpio_backlight.h index 5ae0d9c80d4d..683d90453c41 100644 --- a/include/linux/platform_data/gpio_backlight.h +++ b/include/linux/platform_data/gpio_backlight.h @@ -14,7 +14,6 @@ struct gpio_backlight_platform_data { struct device *fbdev; int gpio; int def_value; - bool active_low; const char *name; }; -- cgit From f02a60924c221985fb8b634734d6610706fa779a Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 6 Aug 2017 18:44:07 +0200 Subject: uapi linux/dlm_netlink.h: include linux/dlmconstants.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes userspace compilation error: error: ‘DLM_RESNAME_MAXLEN’ undeclared here (not in a function) char resource_name[DLM_RESNAME_MAXLEN]; Signed-off-by: Mikko Rapeli Signed-off-by: David Teigland --- include/uapi/linux/dlm_netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/dlm_netlink.h b/include/uapi/linux/dlm_netlink.h index 647c8ef27227..ef1e2e08769a 100644 --- a/include/uapi/linux/dlm_netlink.h +++ b/include/uapi/linux/dlm_netlink.h @@ -10,6 +10,7 @@ #define _DLM_NETLINK_H #include +#include enum { DLM_STATUS_WAITING = 1, -- cgit From 2572ac53c46f58e500b9d8d0f99785666038c590 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:17 +0200 Subject: net: sched: make type an argument for ndo_setup_tc Since the type is always present, push it to be a separate argument to ndo_setup_tc. On the way, name the type enum and use it for arg type. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a3cdc1b1f31..e4238e540544 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -774,7 +774,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, /* These structures hold the attributes of qdisc and classifiers * that are being passed to the netdevice through the setup_tc op. */ -enum { +enum tc_setup_type { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, @@ -785,7 +785,6 @@ enum { struct tc_cls_u32_offload; struct tc_to_netdev { - unsigned int type; union { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; @@ -978,8 +977,9 @@ struct xfrmdev_ops { * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); - * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index, - * __be16 protocol, struct tc_to_netdev *tc); + * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, + * u32 handle, u32 chain_index, __be16 protocol, + * struct tc_to_netdev *tc); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif * tx queues stopped. This allows the netdevice to perform queue @@ -1227,6 +1227,7 @@ struct net_device_ops { struct net_device *dev, int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, + enum tc_setup_type type, u32 handle, u32 chain_index, __be16 protocol, struct tc_to_netdev *tc); -- cgit From ade9b6588420b335851951702ab975c975b0c1b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:18 +0200 Subject: net: sched: rename TC_SETUP_MATCHALL to TC_SETUP_CLSMATCHALL In order to be aligned with the rest of the types, rename TC_SETUP_MATCHALL to TC_SETUP_CLSMATCHALL. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e4238e540544..f8051a36f900 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -778,7 +778,7 @@ enum tc_setup_type { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, - TC_SETUP_MATCHALL, + TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, }; -- cgit From 3e0e82664322290a59189f7c2bcb39b0de932505 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:19 +0200 Subject: net: sched: make egress_dev flag part of flower offload struct Since this is specific to flower now, make it part of the flower offload struct. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - include/net/pkt_cls.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8051a36f900..bd49dbaee84e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -792,7 +792,6 @@ struct tc_to_netdev { struct tc_cls_bpf_offload *cls_bpf; struct tc_mqprio_qopt *mqprio; }; - bool egress_dev; }; /* These structures hold the attributes of xdp state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e0c54f111467..8213acdfdf5a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -504,6 +504,7 @@ struct tc_cls_flower_offload { struct fl_flow_key *mask; struct fl_flow_key *key; struct tcf_exts *exts; + bool egress_dev; }; enum tc_matchall_command { -- cgit From 5fd9fc4e207dba0c05cafe78417952b4c4ca02dc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:29 +0200 Subject: net: sched: push cls related args into cls_common structure As ndo_setup_tc is generic offload op for whole tc subsystem, does not really make sense to have cls-specific args. So move them under cls_common structurure which is embedded in all cls structs. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- include/net/pkt_cls.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bd49dbaee84e..6e2f7e38cf8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -977,7 +977,6 @@ struct xfrmdev_ops { * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - * u32 handle, u32 chain_index, __be16 protocol, * struct tc_to_netdev *tc); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif @@ -1227,8 +1226,6 @@ struct net_device_ops { int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - u32 handle, u32 chain_index, - __be16 protocol, struct tc_to_netdev *tc); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 8213acdfdf5a..ffaddf72108e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -405,6 +405,21 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } #endif /* CONFIG_NET_CLS_IND */ +struct tc_cls_common_offload { + u32 handle; + u32 chain_index; + __be16 protocol; +}; + +static inline void +tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, + const struct tcf_proto *tp) +{ + cls_common->handle = tp->q->handle; + cls_common->chain_index = tp->chain->index; + cls_common->protocol = tp->protocol; +} + struct tc_cls_u32_knode { struct tcf_exts *exts; struct tc_u32_sel *sel; @@ -431,6 +446,7 @@ enum tc_clsu32_command { }; struct tc_cls_u32_offload { + struct tc_cls_common_offload common; /* knode values */ enum tc_clsu32_command command; union { @@ -497,6 +513,7 @@ enum tc_fl_command { }; struct tc_cls_flower_offload { + struct tc_cls_common_offload common; enum tc_fl_command command; u32 prio; unsigned long cookie; @@ -513,6 +530,7 @@ enum tc_matchall_command { }; struct tc_cls_matchall_offload { + struct tc_cls_common_offload common; enum tc_matchall_command command; struct tcf_exts *exts; unsigned long cookie; @@ -526,6 +544,7 @@ enum tc_clsbpf_command { }; struct tc_cls_bpf_offload { + struct tc_cls_common_offload common; enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; -- cgit From d7c1c8d2e53be974b5c72e31d7d35f6d9737fe84 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:30 +0200 Subject: net: sched: move prio into cls_common prio is not cls_flower specific, but it is meaningful for all classifiers. Seems that only mlxsw cares about the value. Obviously, cls offload in other drivers is broken. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index ffaddf72108e..572083af02ac 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -409,6 +409,7 @@ struct tc_cls_common_offload { u32 handle; u32 chain_index; __be16 protocol; + u32 prio; }; static inline void @@ -418,6 +419,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->handle = tp->q->handle; cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; + cls_common->prio = tp->prio; } struct tc_cls_u32_knode { @@ -515,7 +517,6 @@ enum tc_fl_command { struct tc_cls_flower_offload { struct tc_cls_common_offload common; enum tc_fl_command command; - u32 prio; unsigned long cookie; struct flow_dissector *dissector; struct fl_flow_key *mask; -- cgit From de4784ca030fed17d527dbb2bb4e21328b12de94 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Aug 2017 10:15:32 +0200 Subject: net: sched: get rid of struct tc_to_netdev Get rid of struct tc_to_netdev which is now just unnecessary container and rather pass per-type structures down to drivers directly. Along with that, consolidate the naming of per-type structure variables in cls_*. Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6e2f7e38cf8e..1d238d54c484 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -771,9 +771,6 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); -/* These structures hold the attributes of qdisc and classifiers - * that are being passed to the netdevice through the setup_tc op. - */ enum tc_setup_type { TC_SETUP_MQPRIO, TC_SETUP_CLSU32, @@ -782,18 +779,6 @@ enum tc_setup_type { TC_SETUP_CLSBPF, }; -struct tc_cls_u32_offload; - -struct tc_to_netdev { - union { - struct tc_cls_u32_offload *cls_u32; - struct tc_cls_flower_offload *cls_flower; - struct tc_cls_matchall_offload *cls_mall; - struct tc_cls_bpf_offload *cls_bpf; - struct tc_mqprio_qopt *mqprio; - }; -}; - /* These structures hold the attributes of xdp state that are being passed * to the netdevice through the xdp op. */ @@ -977,7 +962,7 @@ struct xfrmdev_ops { * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - * struct tc_to_netdev *tc); + * void *type_data); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif * tx queues stopped. This allows the netdevice to perform queue @@ -1226,7 +1211,7 @@ struct net_device_ops { int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, - struct tc_to_netdev *tc); + void *type_data); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); -- cgit From 67b465250e045446ad6fc59ab3e02beb40435878 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 25 Jul 2017 16:14:24 +0200 Subject: scsi: fc: start decoupling fc_block_scsi_eh from scsi_cmnd Scsi_cmnd is an unsuitable argument for eh_device_reset_handler(), eh_target_reset_handler(), and eh_host_reset_handler() which do not have the scope of one single SCSI command. These callbacks tend to use fc_block_scsi_eh() requiring scsi_cmnd. In order to start decoupling above eh callbacks from scsi_cmnd, introduce a new variant of the function called fc_block_rport() taking an fc_rport as argument. Refactor the old fc_block_scsi_eh() to simply delegate to fc_block_rport(). Signed-off-by: Steffen Maier Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_fc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index e308cd59e556..e8644eea9fe5 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -804,6 +804,7 @@ void fc_host_post_vendor_event(struct Scsi_Host *shost, u32 event_number, struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel, struct fc_vport_identifiers *); int fc_vport_terminate(struct fc_vport *vport); +int fc_block_rport(struct fc_rport *rport); int fc_block_scsi_eh(struct scsi_cmnd *cmnd); enum blk_eh_timer_return fc_eh_timed_out(struct scsi_cmnd *scmd); -- cgit From fb74c27735f0a34e76dbf1972084e984ad2ea145 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:16 -0700 Subject: net: ipv4: add second dif to udp socket lookups Add a second device index, sdif, to udp socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Early demux lookups are handled in the next patch as part of INET_MATCH changes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip.h | 10 ++++++++++ include/net/udp.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 9e59dcf1787a..39db596eb89f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -78,6 +78,16 @@ struct ipcm_cookie { #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) +/* return enslaved device index if relevant */ +static inline int inet_sdif(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) + return IPCB(skb)->iif; +#endif + return 0; +} + struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..826c713d5a48 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -287,7 +287,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif, + __be32 daddr, __be16 dport, int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); -- cgit From 3fa6f616a7a4d0bdf4d877d530456d8a5c3b109b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:17 -0700 Subject: net: ipv4: add second dif to inet socket lookups Add a second device index, sdif, to inet socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after the cb move in tcp_v4_rcv the tcp_v4_sdif helper is used. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 31 +++++++++++++++++-------------- include/net/tcp.h | 10 ++++++++++ 2 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5026b1f08bb8..2dbbbff5e1e3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net, const __be32 saddr, const __be16 sport, const __be32 daddr, const unsigned short hnum, - const int dif); + const int dif, const int sdif); static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, int dif) + __be32 daddr, __be16 dport, int dif, int sdif) { return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, - daddr, ntohs(dport), dif); + daddr, ntohs(dport), dif, sdif); } /* Socket demux engine toys. */ @@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net, (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_addrpair == (__cookie)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const int __name __deprecated __attribute__((unused)) -#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_daddr == (__saddr)) && \ ((__sk)->sk_rcv_saddr == (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* 64-bit arch */ @@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif); + const int dif, const int sdif); static inline struct sock * inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, @@ -297,7 +299,7 @@ static inline struct sock * const int dif) { return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, - ntohs(dport), dif); + ntohs(dport), dif, 0); } static inline struct sock *__inet_lookup(struct net *net, @@ -305,20 +307,20 @@ static inline struct sock *__inet_lookup(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif, + const int dif, const int sdif, bool *refcounted) { u16 hnum = ntohs(dport); struct sock *sk; sk = __inet_lookup_established(net, hashinfo, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, - sport, daddr, hnum, dif); + sport, daddr, hnum, dif, sdif); } static inline struct sock *inet_lookup(struct net *net, @@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net, bool refcounted; sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - dport, dif, &refcounted); + dport, dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, int doff, const __be16 sport, const __be16 dport, + const int sdif, bool *refcounted) { struct sock *sk = skb_steal_sock(skb); @@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), + iph->daddr, dport, inet_iif(skb), sdif, refcounted); } diff --git a/include/net/tcp.h b/include/net/tcp.h index 5173fecde495..2b89f1ab8552 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) return false; } +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline int tcp_v4_sdif(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return TCP_SKB_CB(skb)->header.h4.iif; +#endif + return 0; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ -- cgit From 67359930e185c491b47cb958d5f1d6c1af4598a2 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:18 -0700 Subject: net: ipv4: add second dif to raw socket lookups Add a second device index, sdif, to raw socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 57c33dd22ec4..99d26d0c4a19 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -26,7 +26,7 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, - __be32 laddr, int dif); + __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); -- cgit From 60d9b03141243589dacd3136f3fcb4e6976df954 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:19 -0700 Subject: net: ipv4: add second dif to multicast source filter Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/igmp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 97caf1821de8..f8231854b5d6 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -118,7 +118,8 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_msfilter __user *optval, int __user *optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, struct group_filter __user *optval, int __user *optlen); -extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif); +extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, + int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); extern void ip_mc_destroy_dev(struct in_device *); extern void ip_mc_up(struct in_device *); -- cgit From 1801b570dd2ae50b90231f283e79a9a94fbe7875 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:20 -0700 Subject: net: ipv6: add second dif to udp socket lookups Add a second device index, sdif, to udp socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Early demux lookups are handled in the next patch as part of INET_MATCH changes. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 10 ++++++++++ include/net/udp.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 474d6bbc158c..ac2da4e11d5e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -158,6 +158,16 @@ static inline bool inet6_is_jumbogram(const struct sk_buff *skb) return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM); } +/* can not be used in TCP layer after tcp_v6_fill_cb */ +static inline int inet6_sdif(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return IP6CB(skb)->iif; +#endif + return 0; +} + /* can not be used in TCP layer after tcp_v6_fill_cb */ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { diff --git a/include/net/udp.h b/include/net/udp.h index 826c713d5a48..20dcdca4e85c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -298,7 +298,7 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl, + int dif, int sdif, struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport); -- cgit From 4297a0ef085729af98adab9131d128c576ed3044 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:21 -0700 Subject: net: ipv6: add second dif to inet6 socket lookups Add a second device index, sdif, to inet6 socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. TCP moves the data in the cb. Prior to tcp_v4_rcv (e.g., early demux) the ingress index is obtained from IPCB using inet_sdif and after tcp_v4_rcv tcp_v4_sdif is used. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 22 +++++++++++++--------- include/net/tcp.h | 10 ++++++++++ 2 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index b87becacd9d3..6e91e38a31da 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -49,7 +49,8 @@ struct sock *__inet6_lookup_established(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const u16 hnum, const int dif); + const u16 hnum, const int dif, + const int sdif); struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, @@ -57,7 +58,8 @@ struct sock *inet6_lookup_listener(struct net *net, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const unsigned short hnum, const int dif); + const unsigned short hnum, + const int dif, const int sdif); static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -66,24 +68,25 @@ static inline struct sock *__inet6_lookup(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif, + const int dif, const int sdif, bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, - sport, daddr, hnum, dif); + sport, daddr, hnum, + dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, sdif); } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, - int iif, + int iif, int sdif, bool *refcounted) { struct sock *sk = skb_steal_sock(skb); @@ -95,7 +98,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), - iif, refcounted); + iif, sdif, refcounted); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, @@ -107,13 +110,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, int inet6_hash(struct sock *sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ -#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ +#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \ (((__sk)->sk_portpair == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr)) && \ ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr)) && \ (!(__sk)->sk_bound_dev_if || \ - ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_bound_dev_if == (__dif)) || \ + ((__sk)->sk_bound_dev_if == (__sdif))) && \ net_eq(sock_net(__sk), (__net))) #endif /* _INET6_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2b89f1ab8552..999f3efe572b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -827,6 +827,16 @@ static inline int tcp_v6_iif(const struct sk_buff *skb) return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } + +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline int tcp_v6_sdif(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags)) + return TCP_SKB_CB(skb)->header.h6.iif; +#endif + return 0; +} #endif /* TCP_SKB_CB reference means this can not be used from early demux */ -- cgit From 5108ab4bf446fa9ad2c71f5fc1d839067b72636f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 7 Aug 2017 08:44:22 -0700 Subject: net: ipv6: add second dif to raw socket lookups Add a second device index, sdif, to raw socket lookups. sdif is the index for ingress devices enslaved to an l3mdev. It allows the lookups to consider the enslaved device as well as the L3 domain when searching for a socket. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/rawv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rawv6.h b/include/net/rawv6.h index cbe4e9de1894..4addc5c988e0 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -6,7 +6,7 @@ extern struct raw_hashinfo raw_v6_hashinfo; struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif); + const struct in6_addr *rmt_addr, int dif, int sdif); int raw_abort(struct sock *sk, int err); -- cgit From cf5f5cea270655dd49370760576c64b228583b79 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 4 Aug 2017 16:00:09 -0700 Subject: bpf: add support for sys_enter_* and sys_exit_* tracepoints Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_* style tracepoints. The iovisor/bcc issue #748 (https://github.com/iovisor/bcc/issues/748) documents this issue. For example, if you try to attach a bpf program to tracepoints syscalls/sys_enter_newfstat, you will get the following error: # ./tools/trace.py t:syscalls:sys_enter_newfstat Ioctl(PERF_EVENT_IOC_SET_BPF): Invalid argument Failed to attach BPF to tracepoint The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_* tracepoints are treated differently from other tracepoints and there is no bpf hook to it. This patch adds bpf support for these syscalls tracepoints by . permitting bpf attachment in ioctl PERF_EVENT_IOC_SET_BPF . calling bpf programs in perf_syscall_enter and perf_syscall_exit The legality of bpf program ctx access is also checked. Function trace_event_get_offsets returns correct max offset for each specific syscall tracepoint, which is compared against the maximum offset access in bpf program. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/syscalls.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3cb15ea48aee..c9170218e9e6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -172,8 +172,20 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __used \ __attribute__((section("__syscalls_metadata"))) \ *__p_syscall_meta_##sname = &__syscall_meta_##sname; + +static inline int is_syscall_trace_event(struct trace_event_call *tp_event) +{ + return tp_event->class == &event_class_syscall_enter || + tp_event->class == &event_class_syscall_exit; +} + #else #define SYSCALL_METADATA(sname, nb, ...) + +static inline int is_syscall_trace_event(struct trace_event_call *tp_event) +{ + return 0; +} #endif #define SYSCALL_DEFINE0(sname) \ -- cgit From 8113c095672f6504b23eba6edf4a57b5f7f744af Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 4 Aug 2017 21:31:43 -0700 Subject: net_sched: use void pointer for filter handle Now we use 'unsigned long fh' as a pointer in every place, it is safe to convert it to a void pointer now. This gets rid of many casts to pointer. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- include/net/sch_generic.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 572083af02ac..0f78e6560b2d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -11,7 +11,7 @@ struct tcf_walker { int stop; int skip; int count; - int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *); + int (*fn)(struct tcf_proto *, void *node, struct tcf_walker *); }; int register_tcf_proto_ops(struct tcf_proto_ops *ops); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c123e2b2415..e79f5ad1c5f3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -213,16 +213,16 @@ struct tcf_proto_ops { int (*init)(struct tcf_proto*); void (*destroy)(struct tcf_proto*); - unsigned long (*get)(struct tcf_proto*, u32 handle); + void* (*get)(struct tcf_proto*, u32 handle); int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - unsigned long *, bool); - int (*delete)(struct tcf_proto*, unsigned long, bool*); + void **, bool); + int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); /* rtnetlink specific */ - int (*dump)(struct net*, struct tcf_proto*, unsigned long, + int (*dump)(struct net*, struct tcf_proto*, void *, struct sk_buff *skb, struct tcmsg*); struct module *owner; -- cgit From b04c80d3a7e228cfb832cdb1c9ce8151f174669c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 5 Aug 2017 12:38:25 +0200 Subject: ipv6: sr: export SRH insertion functions This patch exports the seg6_do_srh_encap() and seg6_do_srh_inline() functions. It also removes the CONFIG_IPV6_SEG6_INLINE knob that enabled the compilation of seg6_do_srh_inline(). This function is now built-in. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index 4e0357517d79..a32abb040e1d 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,5 +58,7 @@ extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif -- cgit From d1df6fd8a1d22d37cffa0075ab8ad423ce656777 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Sat, 5 Aug 2017 12:38:26 +0200 Subject: ipv6: sr: define core operations for seg6local lightweight tunnel This patch implements a new type of lightweight tunnel named seg6local. A seg6local lwt is defined by a type of action and a set of parameters. The action represents the operation to perform on the packets matching the lwt's route, and is not necessarily an encapsulation. The set of parameters are arguments for the processing function. Each action is defined in a struct seg6_action_desc within seg6_action_table[]. This structure contains the action, mandatory attributes, the processing function, and a static headroom size required by the action. The mandatory attributes are encoded as a bitmask field. The static headroom is set to a non-zero value when the processing function always add a constant number of bytes to the skb (e.g. the header size for encapsulations). To facilitate rtnetlink-related operations such as parsing, fill_encap, and cmp_encap, each type of action parameter is associated to three function pointers, in seg6_action_params[]. All actions defined in seg6_local.h are detailed in [1]. [1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_local.h | 6 ++++ include/net/seg6.h | 2 ++ include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/seg6_local.h | 68 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 include/linux/seg6_local.h create mode 100644 include/uapi/linux/seg6_local.h (limited to 'include') diff --git a/include/linux/seg6_local.h b/include/linux/seg6_local.h new file mode 100644 index 000000000000..ee63e76fe0c7 --- /dev/null +++ b/include/linux/seg6_local.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_LOCAL_H +#define _LINUX_SEG6_LOCAL_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h index a32abb040e1d..5379f550f521 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -56,6 +56,8 @@ extern int seg6_init(void); extern void seg6_exit(void); extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); +extern int seg6_local_init(void); +extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index 92724cba1eba..7fdd19ca7511 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -11,6 +11,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP6, LWTUNNEL_ENCAP_SEG6, LWTUNNEL_ENCAP_BPF, + LWTUNNEL_ENCAP_SEG6_LOCAL, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h new file mode 100644 index 000000000000..ef2d8c3e76c1 --- /dev/null +++ b/include/uapi/linux/seg6_local.h @@ -0,0 +1,68 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_LOCAL_H +#define _UAPI_LINUX_SEG6_LOCAL_H + +#include + +enum { + SEG6_LOCAL_UNSPEC, + SEG6_LOCAL_ACTION, + SEG6_LOCAL_SRH, + SEG6_LOCAL_TABLE, + SEG6_LOCAL_NH4, + SEG6_LOCAL_NH6, + SEG6_LOCAL_IIF, + SEG6_LOCAL_OIF, + __SEG6_LOCAL_MAX, +}; +#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1) + +enum { + SEG6_LOCAL_ACTION_UNSPEC = 0, + /* node segment */ + SEG6_LOCAL_ACTION_END = 1, + /* adjacency segment (IPv6 cross-connect) */ + SEG6_LOCAL_ACTION_END_X = 2, + /* lookup of next seg NH in table */ + SEG6_LOCAL_ACTION_END_T = 3, + /* decap and L2 cross-connect */ + SEG6_LOCAL_ACTION_END_DX2 = 4, + /* decap and IPv6 cross-connect */ + SEG6_LOCAL_ACTION_END_DX6 = 5, + /* decap and IPv4 cross-connect */ + SEG6_LOCAL_ACTION_END_DX4 = 6, + /* decap and lookup of DA in v6 table */ + SEG6_LOCAL_ACTION_END_DT6 = 7, + /* decap and lookup of DA in v4 table */ + SEG6_LOCAL_ACTION_END_DT4 = 8, + /* binding segment with insertion */ + SEG6_LOCAL_ACTION_END_B6 = 9, + /* binding segment with encapsulation */ + SEG6_LOCAL_ACTION_END_B6_ENCAP = 10, + /* binding segment with MPLS encap */ + SEG6_LOCAL_ACTION_END_BM = 11, + /* lookup last seg in table */ + SEG6_LOCAL_ACTION_END_S = 12, + /* forward to SR-unaware VNF with static proxy */ + SEG6_LOCAL_ACTION_END_AS = 13, + /* forward to SR-unaware VNF with masquerading */ + SEG6_LOCAL_ACTION_END_AM = 14, + + __SEG6_LOCAL_ACTION_MAX, +}; + +#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1) + +#endif -- cgit From 6c2c1dcb185f1e44e1c895781dbaba40195234f9 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:39 +0300 Subject: net: dsa: Change DSA slave FDB API to be switchdev independent In order to support FDB add/del to be on a notifier chain the slave API need to be changed to be switchdev independent. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 0b1a0622b33c..ba11005dac8c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -386,13 +386,11 @@ struct dsa_switch_ops { * Forwarding database */ int (*port_fdb_prepare)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); + const unsigned char *addr, u16 vid); void (*port_fdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); + const unsigned char *addr, u16 vid); int (*port_fdb_del)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb); + const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb); -- cgit From 1b6dd556c3045ca5fa31cc1e98a4a43afa680e1e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:40 +0300 Subject: net: dsa: Remove prepare phase for FDB The prepare phase for FDB add is unneeded because most of DSA devices can have failures during bus transactions (SPI, I2C, etc.), thus, the prepare phase cannot guarantee success of the commit stage. The support for learning FDB through notification chain, which will be introduced in the following patches, will provide the ability to notify back the bridge about successful offload. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index ba11005dac8c..446fc438cb80 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -385,9 +385,7 @@ struct dsa_switch_ops { /* * Forwarding database */ - int (*port_fdb_prepare)(struct dsa_switch *ds, int port, - const unsigned char *addr, u16 vid); - void (*port_fdb_add)(struct dsa_switch *ds, int port, + int (*port_fdb_add)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); -- cgit From c069fcd82c571953b8aaf68769afe9ccb1aa7a9f Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:46 +0300 Subject: net: dsa: Remove support for bypass bridge port attributes/vlan set The bridge port attributes/vlan for DSA devices should be set only from bridge code. Furthermore, The vlans are synced totally with the bridge so there is no need for special dump support. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 446fc438cb80..d7b9bdd0e768 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -378,10 +378,6 @@ struct dsa_switch_ops { struct switchdev_trans *trans); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); - int (*port_vlan_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb); - /* * Forwarding database */ -- cgit From dc0cbff3ff9fe331160c2be2b3f47564e247137d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:48 +0300 Subject: net: dsa: Remove redundant MDB dump support Currently the MDB HW database is synced with the bridge's one, thus, There is no need to support special dump functionality. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index d7b9bdd0e768..4ef185997b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -400,10 +400,6 @@ struct dsa_switch_ops { struct switchdev_trans *trans); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); - int (*port_mdb_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb); - /* * RXNFC */ -- cgit From 2bedde1abbef5eec211308f0293dd7681b0513ec Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:49 +0300 Subject: net: dsa: Move FDB dump implementation inside DSA >From all switchdev devices only DSA requires special FDB dump. This is due to lack of ability for syncing the hardware learned FDBs with the bridge. Due to this it is removed from switchdev and moved inside DSA. Signed-off-by: Arkadi Sharshevsky Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++-- include/net/switchdev.h | 12 ------------ 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 4ef185997b6f..a4f66dbb4b7c 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -272,6 +272,8 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) return ds->rtable[dst->cpu_dp->ds->index]; } +typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, + bool is_static, void *data); struct dsa_switch_ops { /* * Legacy probing. @@ -386,8 +388,7 @@ struct dsa_switch_ops { int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int (*port_fdb_dump)(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb); + dsa_fdb_dump_cb_t *cb, void *data); /* * Multicast database diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 8ae9e3b6392e..d2637a685ca6 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -208,9 +208,6 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); -int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -309,15 +306,6 @@ static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], return -EOPNOTSUPP; } -static inline int switchdev_port_fdb_dump(struct sk_buff *skb, - struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, - int *idx) -{ - return *idx; -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit From 29ab586c3d83f81c435e269cace9a1619afb5bbd Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Sun, 6 Aug 2017 16:15:51 +0300 Subject: net: switchdev: Remove bridge bypass support from switchdev Currently the bridge port flags, vlans, FDBs and MDBs can be offloaded through the bridge code, making the switchdev's SELF bridge bypass implementation to be redundant. This implies several changes: - No need for dump infra in switchdev, DSA's special case is handled privately. - Remove obj_dump from switchdev_ops. - FDBs are removed from obj_add/del routines, due to the fact that they are offloaded through the bridge notification chain. - The switchdev_port_bridge_xx() and switchdev_port_fdb_xx() functions can be removed. Signed-off-by: Arkadi Sharshevsky Reviewed-by: Vivien Didelot Acked-by: Jiri Pirko Reviewed-by: Ivan Vecera Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/switchdev.h | 75 ------------------------------------------------- 1 file changed, 75 deletions(-) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d2637a685ca6..d767b7991887 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -74,7 +74,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -97,17 +96,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_PORT_FDB */ -struct switchdev_obj_port_fdb { - struct switchdev_obj obj; - unsigned char addr[ETH_ALEN]; - u16 vid; - u16 ndm_state; -}; - -#define SWITCHDEV_OBJ_PORT_FDB(obj) \ - container_of(obj, struct switchdev_obj_port_fdb, obj) - /* SWITCHDEV_OBJ_ID_PORT_MDB */ struct switchdev_obj_port_mdb { struct switchdev_obj obj; @@ -135,8 +123,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*). * * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*). - * - * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj_*). */ struct switchdev_ops { int (*switchdev_port_attr_get)(struct net_device *dev, @@ -149,9 +135,6 @@ struct switchdev_ops { struct switchdev_trans *trans); int (*switchdev_port_obj_del)(struct net_device *dev, const struct switchdev_obj *obj); - int (*switchdev_port_obj_dump)(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb); }; enum switchdev_notifier_type { @@ -189,25 +172,10 @@ int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj); int switchdev_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj); -int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb); int register_switchdev_notifier(struct notifier_block *nb); int unregister_switchdev_notifier(struct notifier_block *nb); int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info); -int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask, - int nlflags); -int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags); -int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid, u16 nlm_flags); -int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -246,13 +214,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_port_obj_dump(struct net_device *dev, - const struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - return -EOPNOTSUPP; -} - static inline int register_switchdev_notifier(struct notifier_block *nb) { return 0; @@ -270,42 +231,6 @@ static inline int call_switchdev_notifiers(unsigned long val, return NOTIFY_DONE; } -static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, - u32 seq, struct net_device *dev, - u32 filter_mask, int nlflags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, - u16 flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, - u16 vid, u16 nlm_flags) -{ - return -EOPNOTSUPP; -} - -static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - return -EOPNOTSUPP; -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit From 199b5763d329b43c88f6ad539db8a6c6b42f8edb Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" Date: Tue, 8 Aug 2017 12:05:32 +0800 Subject: KVM: add spinlock optimization framework If a vcpu exits due to request a user mode spinlock, then the spinlock-holder may be preempted in user mode or kernel mode. (Note that not all architectures trap spin loops in user mode, only AMD x86 and ARM/ARM64 currently do). But if a vcpu exits in kernel mode, then the holder must be preempted in kernel mode, so we should choose a vcpu in kernel mode as a more likely candidate for the lock holder. This introduces kvm_arch_vcpu_in_kernel() to decide whether the vcpu is in kernel-mode when it's preempted. kvm_vcpu_on_spin's new argument says the same of the spinning VCPU. Signed-off-by: Longpeng(Mike) Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 28112d7917c1..6882538eda32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -720,7 +720,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); -void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); @@ -800,6 +800,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); #ifndef __KVM_HAVE_ARCH_VM_ALLOC -- cgit From 4c7089ee66026f38275d43e26d9da6e2945af6f9 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 21 Jul 2017 06:48:33 -0400 Subject: media: media-device: set driver_version directly Don't use driver_version from struct media_device, just return LINUX_VERSION_CODE as the other media subsystems do. The driver_version field in struct media_device will be removed in the following patches. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/media/media-device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/media/media-device.h b/include/media/media-device.h index 6896266031b9..7ae200d89a9f 100644 --- a/include/media/media-device.h +++ b/include/media/media-device.h @@ -249,11 +249,6 @@ void media_device_cleanup(struct media_device *mdev); * driver-specific format. When possible the revision should be formatted * with the KERNEL_VERSION() macro. * - * - &media_entity.driver_version is formatted with the KERNEL_VERSION() - * macro. The version minor must be incremented when new features are added - * to the userspace API without breaking binary compatibility. The version - * major must be incremented when binary compatibility is broken. - * * .. note:: * * #) Upon successful registration a character device named media[0-9]+ is created. The device major and minor numbers are dynamic. The model name is exported as a sysfs attribute. -- cgit From 2bd8682375f3a79e463d8840794d2872b02d755b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 21 Jul 2017 04:49:44 -0400 Subject: media: media-device: remove driver_version Since the driver_version field in struct media_device is no longer used, just remove it. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/media/media-device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/media/media-device.h b/include/media/media-device.h index 7ae200d89a9f..bcc6ec434f1f 100644 --- a/include/media/media-device.h +++ b/include/media/media-device.h @@ -68,7 +68,6 @@ struct media_device_ops { * @serial: Device serial number (optional) * @bus_info: Unique and stable device location identifier * @hw_revision: Hardware device revision - * @driver_version: Device driver version * @topology_version: Monotonic counter for storing the version of the graph * topology. Should be incremented each time the topology changes. * @id: Unique ID used on the last registered graph object @@ -134,7 +133,6 @@ struct media_device { char serial[40]; char bus_info[32]; u32 hw_revision; - u32 driver_version; u64 topology_version; -- cgit From 6c2c188f35c61c8eee71ec6d07524ce122c06539 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 28 Jul 2017 03:25:06 -0400 Subject: media: drop use of MEDIA_API_VERSION Set media_version to LINUX_VERSION_CODE, just as we did for driver_version. Nobody ever rememebers to update the version number, but LINUX_VERSION_CODE will always be updated. Move the MEDIA_API_VERSION define to the ifndef __KERNEL__ section of the media.h header. That way kernelspace can't accidentally start to use it again. Signed-off-by: Hans Verkuil Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/media.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h index fac96c64fe51..4865f1e71339 100644 --- a/include/uapi/linux/media.h +++ b/include/uapi/linux/media.h @@ -30,8 +30,6 @@ #include #include -#define MEDIA_API_VERSION KERNEL_VERSION(0, 1, 0) - struct media_device_info { char driver[16]; char model[32]; @@ -187,6 +185,9 @@ struct media_device_info { #define MEDIA_ENT_T_V4L2_SUBDEV_LENS MEDIA_ENT_F_LENS #define MEDIA_ENT_T_V4L2_SUBDEV_DECODER MEDIA_ENT_F_ATV_DECODER #define MEDIA_ENT_T_V4L2_SUBDEV_TUNER MEDIA_ENT_F_TUNER + +/* Obsolete symbol for media_version, no longer used in the kernel */ +#define MEDIA_API_VERSION KERNEL_VERSION(0, 1, 0) #endif /* Entity flags */ -- cgit From 0ec9eb90feec4933637fbde9d5bfbc3b62aea218 Mon Sep 17 00:00:00 2001 From: Chi-Hsien Lin Date: Thu, 3 Aug 2017 17:37:58 +0800 Subject: brcmfmac: Add support for CYW4373 SDIO/USB chipset Add support for CYW4373 SDIO/USB chipset. CYW4373 is a 1x1 dual-band 11ac chipset with 20/40/80Mhz channel support. It's a WiFi/BT combo device. Signed-off-by: Chi-Hsien Lin Reviewed-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index b733eb404ffc..abacd5484bc0 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -39,6 +39,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_DEVICE_ID_BROADCOM_4356 0x4356 +#define SDIO_DEVICE_ID_CYPRESS_4373 0x4373 #define SDIO_VENDOR_ID_INTEL 0x0089 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 -- cgit From 41c8f70f5a3db7e06179186b6525fd9ee1d7d314 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Aug 2017 14:30:11 -0400 Subject: xprtrdma: Harden backchannel call decoding Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 261b48a2701d..86b59e3525a5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -239,6 +239,19 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +/** + * xdr_stream_remaining - Return the number of bytes remaining in the stream + * @xdr: pointer to struct xdr_stream + * + * Return value: + * Number of bytes remaining in @xdr before xdr->end + */ +static inline size_t +xdr_stream_remaining(const struct xdr_stream *xdr) +{ + return xdr->nwords << 2; +} + ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); /** -- cgit From d6344d4b5628052ccda104466e5e05b9c43d7b61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Aug 2017 14:57:39 +0200 Subject: cpufreq: Simplify cpufreq_can_do_remote_dvfs() The if () in cpufreq_can_do_remote_dvfs() is superfluous, so drop it and simply return the value of the expression under it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1981a4a167ce..d69464139f67 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -578,11 +578,8 @@ static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) * - dvfs_possible_from_any_cpu flag is set * - the local and remote CPUs share cpufreq policy */ - if (policy->dvfs_possible_from_any_cpu || - cpumask_test_cpu(smp_processor_id(), policy->cpus)) - return true; - - return false; + return policy->dvfs_possible_from_any_cpu || + cpumask_test_cpu(smp_processor_id(), policy->cpus); } /********************************************************************* -- cgit From ec52e462564b9c5bfdf1f79638c537c7103e1d2b Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Tue, 1 Aug 2017 18:21:22 +0200 Subject: clk: fractional-divider: allow overriding of approximation Fractional dividers may have special requirements concerning numerator and denominator selection that differ from just getting the best approximation. For example on Rockchip socs the denominator must be at least 20 times larger than the numerator to generate precise clock frequencies. Therefore add the ability to provide custom approximation functions. Signed-off-by: Elaine Zhang Acked-by: Stephen Boyd Signed-off-by: Heiko Stuebner --- include/linux/clk-provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c59c62571e4f..1fc113fbf955 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -565,6 +565,9 @@ struct clk_fractional_divider { u8 nwidth; u32 nmask; u8 flags; + void (*approximation)(struct clk_hw *hw, + unsigned long rate, unsigned long *parent_rate, + unsigned long *m, unsigned long *n); spinlock_t *lock; }; -- cgit From d541e45500bd269060c26387902e1bec9783c07c Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:43 -0400 Subject: IB/core: Convert ah_attr from OPA to IB when copying to user OPA address handle atttibutes that have 32 bit LIDs would have to be converted to IB address handle attribute with the LID field programmed in the GID before copying to user space. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Don Hiatt Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/ib_marshall.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h index 68cef3bd50fb..8ebf84ae9ed1 100644 --- a/include/rdma/ib_marshall.h +++ b/include/rdma/ib_marshall.h @@ -38,10 +38,12 @@ #include #include -void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, +void ib_copy_qp_attr_to_user(struct ib_device *device, + struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src); -void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, +void ib_copy_ah_attr_to_user(struct ib_device *device, + struct ib_uverbs_ah_attr *dst, struct rdma_ah_attr *src); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, -- cgit From 582faf3150f57b8364ac9d2aa731d7368ada7a4b Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:47 -0400 Subject: IB/core: Change port_attr.lid size from 16 to 32 bits lid field in struct ib_port_attr is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- include/rdma/opa_addr.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..4fa94e69b1fc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -549,8 +549,8 @@ struct ib_port_attr { u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; - u16 lid; u16 sm_lid; + u32 lid; u8 lmc; u8 max_vl_num; u8 sm_sl; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index eace28f1555d..46d0567fffea 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -50,7 +50,8 @@ #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) - +#define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ + ? 0 : x) /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that -- cgit From db58540b021a17e0ede64f761b740556d77f1679 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:48 -0400 Subject: IB/core: Change port_attr.sm_lid from 16 to 32 bits sm_lid field in struct ib_port_attr is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4fa94e69b1fc..620535908118 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -549,7 +549,7 @@ struct ib_port_attr { u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; - u16 sm_lid; + u32 sm_lid; u32 lid; u8 lmc; u8 max_vl_num; -- cgit From 7db20ecd1d9700e2c240dee505162eb56ab55b5b Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Thu, 8 Jun 2017 13:37:49 -0400 Subject: IB/core: Change wc.slid from 16 to 32 bits slid field in struct ib_wc is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 620535908118..7eaf7d2ab424 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -948,7 +948,7 @@ struct ib_wc { u32 src_qp; int wc_flags; u16 pkey_index; - u16 slid; + u32 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ @@ -3706,4 +3706,16 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, else return RDMA_AH_ATTR_TYPE_IB; } + +/* Return slid in 16bit CPU encoding */ +static inline u16 ib_slid_cpu16(u32 slid) +{ + return (u16)slid; +} + +/* Return slid in 16bit BE encoding */ +static inline u16 ib_slid_be16(u32 slid) +{ + return cpu_to_be16((u16)slid); +} #endif /* IB_VERBS_H */ -- cgit From e92aa00a518971fca6b79aa87a1a9c5e5aa51f3b Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Thu, 8 Jun 2017 13:38:02 -0400 Subject: IB/CM: Add OPA Path record support to CM Add OPA path record support to the Connection Manager. Signed-off-by: Don Hiatt Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- include/rdma/opa_addr.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 46d0567fffea..9b5e642cf550 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -77,4 +77,22 @@ static inline u32 opa_get_lid_from_gid(union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } + +/** + * opa_is_extended_lid: Returns true if dlid or slid are + * extended. + * + * @dlid: The DLID + * @slid: The SLID + */ +static inline bool opa_is_extended_lid(u32 dlid, u32 slid) +{ + if ((be32_to_cpu(dlid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) || + (be32_to_cpu(slid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE))) + return true; + else + return false; +} #endif /* OPA_ADDR_H */ -- cgit From 78249c4215840edb95447ec6867b69a7ac1d7a0d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:38 +0300 Subject: mlx5: convert to generic pci_alloc_irq_vectors Now that we have a generic code to allocate an array of irq vectors and even correctly spread their affinity, correctly handle cpu hotplug events and more, were much better off using it. Reviewed-by: Christoph Hellwig Acked-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..5bac7f53b4f9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -597,7 +597,6 @@ struct mlx5_port_module_event_stats { struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; - struct msix_entry *msix_arr; struct mlx5_irq_info *irq_info; /* pages stuff */ -- cgit From a435393acafbf0ecff4deb3e3cb554b34f0d0664 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:40 +0300 Subject: mlx5: move affinity hints assignments to generic code generic api takes care of spreading affinity similar to what mlx5 open coded (and even handles better asymmetric configurations). Ask the generic API to spread affinity for us, and feed him pre_vectors that do not participate in affinity settings (which is an improvement to what we had before). The affinity assignments should match what mlx5 tried to do earlier but now we do not set affinity to async, cmd and pages dedicated vectors. Also, remove mlx5e_get_cpu and introduce mlx5e_get_node (used for allocation purposes) and mlx5_get_vector_affinity (for indirection table construction) as they provide the needed information. Luckily, we have generic helpers to get cpumask and node given a irq vector. mlx5_get_vector_affinity will be used by mlx5_ib in a subsequent patch. Reviewed-by: Christoph Hellwig Acked-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5bac7f53b4f9..579731842c94 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -534,7 +534,6 @@ struct mlx5_core_sriov { }; struct mlx5_irq_info { - cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; @@ -1184,4 +1183,10 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; +static inline const struct cpumask * +mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) +{ + return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector); +} + #endif /* MLX5_DRIVER_H */ -- cgit From c66cd353bbe6869a059869a7a1518ec619afdc9d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:41 +0300 Subject: RDMA/core: expose affinity mappings per completion vector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow ULPs to intelligently locate threads based on completion vector cpu affinity mappings. In case the driver does not expose a get_vector_affinity callout, return NULL so the caller can maintain a fallback logic. Reviewed-by: Christoph Hellwig Reviewed-by: Håkon Bugge Acked-by: Doug Ledford Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..73ed2e4e802f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2296,6 +2296,8 @@ struct ib_device { */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); + const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, + int comp_vector); }; struct ib_client { @@ -3706,4 +3708,26 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, else return RDMA_AH_ATTR_TYPE_IB; } + +/** + * ib_get_vector_affinity - Get the affinity mappings of a given completion + * vector + * @device: the rdma device + * @comp_vector: index of completion vector + * + * Returns NULL on failure, otherwise a corresponding cpu map of the + * completion vector (returns all-cpus map if the device driver doesn't + * implement get_vector_affinity). + */ +static inline const struct cpumask * +ib_get_vector_affinity(struct ib_device *device, int comp_vector) +{ + if (comp_vector < 0 || comp_vector >= device->num_comp_vectors || + !device->get_vector_affinity) + return NULL; + + return device->get_vector_affinity(device, comp_vector); + +} + #endif /* IB_VERBS_H */ -- cgit From 24c5dc6610e8a3764fcb885cc3284c12ff1513de Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:43 +0300 Subject: block: Add rdma affinity based queue mapping helper Like pci and virtio, we add a rdma helper for affinity spreading. This achieves optimal mq affinity assignments according to the underlying rdma device affinity maps. Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/linux/blk-mq-rdma.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/blk-mq-rdma.h (limited to 'include') diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h new file mode 100644 index 000000000000..b4ade198007d --- /dev/null +++ b/include/linux/blk-mq-rdma.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_RDMA_H +#define _LINUX_BLK_MQ_RDMA_H + +struct blk_mq_tag_set; +struct ib_device; + +int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec); + +#endif /* _LINUX_BLK_MQ_RDMA_H */ -- cgit From f1174f77b50c94eecaa658fdc56fa69b421de4b8 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 7 Aug 2017 15:26:19 +0100 Subject: bpf/verifier: rework value tracking Unifies adjusted and unadjusted register value types (e.g. FRAME_POINTER is now just a PTR_TO_STACK with zero offset). Tracks value alignment by means of tracking known & unknown bits. This also replaces the 'reg->imm' (leading zero bits) calculations for (what were) UNKNOWN_VALUEs. If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES, treat the pointer as an unknown scalar and try again, because we might be able to conclude something about the result (e.g. pointer & 0x40 is either 0 or 0x40). Verifier hooks in the netronome/nfp driver were changed to match the new data structures. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/bpf.h | 34 +++++++------------ include/linux/bpf_verifier.h | 34 +++++++++++-------- include/linux/tnum.h | 79 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 35 deletions(-) create mode 100644 include/linux/tnum.h (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6353c7474dba..39229c455cba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -117,35 +117,25 @@ enum bpf_access_type { }; /* types of values stored in eBPF registers */ +/* Pointer types represent: + * pointer + * pointer + imm + * pointer + (u16) var + * pointer + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) means that 'imm' was added + */ enum bpf_reg_type { NOT_INIT = 0, /* nothing was written into register */ - UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + SCALAR_VALUE, /* reg doesn't contain a valid pointer */ PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ - FRAME_PTR, /* reg == frame_pointer */ - PTR_TO_STACK, /* reg == frame_pointer + imm */ - CONST_IMM, /* constant integer value */ - - /* PTR_TO_PACKET represents: - * skb->data - * skb->data + imm - * skb->data + (u16) var - * skb->data + (u16) var + imm - * if (range > 0) then [ptr, ptr + range - off) is safe to access - * if (id > 0) means that some 'var' was added - * if (off > 0) menas that 'imm' was added - */ - PTR_TO_PACKET, + PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ - - /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map - * elem value. We only allow this if we can statically verify that - * access from this register are going to fall within the size of the - * map element. - */ - PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8e5d31f6faef..85936fa92d12 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -9,6 +9,7 @@ #include /* for enum bpf_reg_type */ #include /* for MAX_BPF_STACK */ +#include /* Just some arbitrary values so we can safely do math without overflowing and * are obviously wrong for any sort of memory access. @@ -19,30 +20,37 @@ struct bpf_reg_state { enum bpf_reg_type type; union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u16 off; - u16 range; - }; + /* valid when type == PTR_TO_PACKET */ + u16 range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; }; + /* Fixed part of pointer offset, pointer types only */ + s32 off; + /* For PTR_TO_PACKET, used to find other pointers with the same variable + * offset, so they can share range knowledge. + * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we + * came from, when one is tested for != NULL. + */ u32 id; + /* These three fields must be last. See states_equal() */ + /* For scalar types (SCALAR_VALUE), this represents our knowledge of + * the actual value. + * For pointer types, this represents the variable part of the offset + * from the pointed-to object, and is shared with all bpf_reg_states + * with the same id as us. + */ + struct tnum var_off; /* Used to determine if any memory access using this register will - * result in a bad access. These two fields must be last. - * See states_equal() + * result in a bad access. + * These refer to the same value as var_off, not necessarily the actual + * contents of the register. */ s64 min_value; u64 max_value; - u32 min_align; - u32 aux_off; - u32 aux_off_align; bool value_from_signed; }; diff --git a/include/linux/tnum.h b/include/linux/tnum.h new file mode 100644 index 000000000000..a0b07bf1842b --- /dev/null +++ b/include/linux/tnum.h @@ -0,0 +1,79 @@ +/* tnum: tracked (or tristate) numbers + * + * A tnum tracks knowledge about the bits of a value. Each bit can be either + * known (0 or 1), or unknown (x). Arithmetic operations on tnums will + * propagate the unknown bits such that the tnum result represents all the + * possible results for possible values of the operands. + */ +#include + +struct tnum { + u64 value; + u64 mask; +}; + +/* Constructors */ +/* Represent a known constant as a tnum. */ +struct tnum tnum_const(u64 value); +/* A completely unknown value */ +extern const struct tnum tnum_unknown; + +/* Arithmetic and logical ops */ +/* Shift a tnum left (by a fixed shift) */ +struct tnum tnum_lshift(struct tnum a, u8 shift); +/* Shift a tnum right (by a fixed shift) */ +struct tnum tnum_rshift(struct tnum a, u8 shift); +/* Add two tnums, return @a + @b */ +struct tnum tnum_add(struct tnum a, struct tnum b); +/* Subtract two tnums, return @a - @b */ +struct tnum tnum_sub(struct tnum a, struct tnum b); +/* Bitwise-AND, return @a & @b */ +struct tnum tnum_and(struct tnum a, struct tnum b); +/* Bitwise-OR, return @a | @b */ +struct tnum tnum_or(struct tnum a, struct tnum b); +/* Bitwise-XOR, return @a ^ @b */ +struct tnum tnum_xor(struct tnum a, struct tnum b); +/* Multiply two tnums, return @a * @b */ +struct tnum tnum_mul(struct tnum a, struct tnum b); + +/* Return a tnum representing numbers satisfying both @a and @b */ +struct tnum tnum_intersect(struct tnum a, struct tnum b); + +/* Return @a with all but the lowest @size bytes cleared */ +struct tnum tnum_cast(struct tnum a, u8 size); + +/* Returns true if @a is a known constant */ +static inline bool tnum_is_const(struct tnum a) +{ + return !a.mask; +} + +/* Returns true if @a == tnum_const(@b) */ +static inline bool tnum_equals_const(struct tnum a, u64 b) +{ + return tnum_is_const(a) && a.value == b; +} + +/* Returns true if @a is completely unknown */ +static inline bool tnum_is_unknown(struct tnum a) +{ + return !~a.mask; +} + +/* Returns true if @a is known to be a multiple of @size. + * @size must be a power of two. + */ +bool tnum_is_aligned(struct tnum a, u64 size); + +/* Returns true if @b represents a subset of @a. */ +bool tnum_in(struct tnum a, struct tnum b); + +/* Formatting functions. These have snprintf-like semantics: they will write + * up to @size bytes (including the terminating NUL byte), and return the number + * of bytes (excluding the terminating NUL) which would have been written had + * sufficient space been available. (Thus tnum_sbin always returns 64.) + */ +/* Format a tnum as a pair of hex numbers (value; mask) */ +int tnum_strn(char *str, size_t size, struct tnum a); +/* Format a tnum as tristate binary expansion */ +int tnum_sbin(char *str, size_t size, struct tnum a); -- cgit From b03c9f9fdc37dab81ea04d5dacdc5995d4c224c2 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 7 Aug 2017 15:26:36 +0100 Subject: bpf/verifier: track signed and unsigned min/max values Allows us to, sometimes, combine information from a signed check of one bound and an unsigned check of the other. We now track the full range of possible values, rather than restricting ourselves to [0, 1<<30) and considering anything beyond that as unknown. While this is probably not necessary, it makes the code more straightforward and symmetrical between signed and unsigned bounds. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 23 ++++++++++++++--------- include/linux/tnum.h | 2 ++ 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 85936fa92d12..c61c3033522e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -11,11 +11,15 @@ #include /* for MAX_BPF_STACK */ #include - /* Just some arbitrary values so we can safely do math without overflowing and - * are obviously wrong for any sort of memory access. - */ -#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -1 +/* Maximum variable offset umax_value permitted when resolving memory accesses. + * In practice this is far bigger than any realistic pointer offset; this limit + * ensures that umax_value + (int)off + (int)size cannot overflow a u64. + */ +#define BPF_MAX_VAR_OFF (1ULL << 31) +/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures + * that converting umax_value to int cannot overflow. + */ +#define BPF_MAX_VAR_SIZ INT_MAX struct bpf_reg_state { enum bpf_reg_type type; @@ -36,7 +40,7 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* These three fields must be last. See states_equal() */ + /* These five fields must be last. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -49,9 +53,10 @@ struct bpf_reg_state { * These refer to the same value as var_off, not necessarily the actual * contents of the register. */ - s64 min_value; - u64 max_value; - bool value_from_signed; + s64 smin_value; /* minimum possible (s64)value */ + s64 smax_value; /* maximum possible (s64)value */ + u64 umin_value; /* minimum possible (u64)value */ + u64 umax_value; /* maximum possible (u64)value */ }; enum bpf_stack_slot_type { diff --git a/include/linux/tnum.h b/include/linux/tnum.h index a0b07bf1842b..0d2d3da46139 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -17,6 +17,8 @@ struct tnum { struct tnum tnum_const(u64 value); /* A completely unknown value */ extern const struct tnum tnum_unknown; +/* A value that's unknown except that @min <= value <= @max */ +struct tnum tnum_range(u64 min, u64 max); /* Arithmetic and logical ops */ /* Shift a tnum left (by a fixed shift) */ -- cgit From feca7d8c135bc1527b244fe817b8b6498066ccec Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Tue, 8 Aug 2017 20:23:49 +0200 Subject: net: ipv6: avoid overhead when no custom FIB rules are installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user hasn't installed any custom rules, don't go through the whole FIB rules layer. This is pretty similar to f4530fa574df (ipv4: Avoid overhead when no custom FIB rules are installed). Using a micro-benchmark module [1], timing ip6_route_output() with get_cycles(), with 40,000 routes in the main routing table, before this patch: min=606 max=12911 count=627 average=1959 95th=4903 90th=3747 50th=1602 mad=821 table=254 avgdepth=21.8 maxdepth=39 value │ ┊ count 600 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ 199 880 │▒▒▒░░░░░░░░░░░░░░░░ 43 1160 │▒▒▒░░░░░░░░░░░░░░░░░░░░ 48 1440 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░ 43 1720 │▒▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░ 59 2000 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 50 2280 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 26 2560 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 31 2840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 28 3120 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 17 3400 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 17 3680 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 3960 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 11 4240 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 6 4520 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 6 4800 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 After: min=544 max=11687 count=627 average=1776 95th=4546 90th=3585 50th=1227 mad=565 table=254 avgdepth=21.8 maxdepth=39 value │ ┊ count 540 │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ 201 800 │▒▒▒▒▒░░░░░░░░░░░░░░░░ 63 1060 │▒▒▒▒▒░░░░░░░░░░░░░░░░░░░░░ 68 1320 │▒▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░ 39 1580 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 32 1840 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 32 2100 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 34 2360 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 33 2620 │▒▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 26 2880 │▒░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 22 3140 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 3400 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 3660 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 9 3920 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 4180 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 4440 │░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ 8 At the frequency of the host during the bench (~ 3.7 GHz), this is about a 100 ns difference on the median value. A next step would be to collapse local and main tables, as in 0ddcf43d5d4a (ipv4: FIB Local/MAIN table collapse). [1]: https://github.com/vincentbernat/network-lab/blob/master/lab-routes-ipv6/kbench_mod.c Signed-off-by: Vincent Bernat Reviewed-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index abdf3b40303b..0e50bf3ed097 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -65,6 +65,7 @@ struct netns_ipv6 { unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES + bool fib6_has_custom_rules; struct rt6_info *ip6_prohibit_entry; struct rt6_info *ip6_blk_hole_entry; struct fib6_table *fib6_local_tbl; -- cgit From 04c2cf34362f133be09878bd752f8b014318b59a Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Tue, 11 Jul 2017 10:07:25 +0300 Subject: mac80211: add api to start ba session timer expired flow Some drivers handle rx buffer reordering internally (and by extension handle also the rx ba session timer internally), but do not ofload the addba/delba negotiation. Add an api for these drivers to properly tear-down the ba session, including sending a delba. Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- include/net/mac80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b2b5419467cc..f8149ca192b4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5499,6 +5499,21 @@ static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); } +/** + * ieee80211_rx_ba_timer_expired - stop a Rx BA session due to timeout + * + * Some device drivers do not offload AddBa/DelBa negotiation, but handle rx + * buffer reording internally, and therefore also handle the session timer. + * + * Trigger the timeout flow, which sends a DelBa. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid); + /* Rate control API */ /** -- cgit From 2d97591ef43d0587be22ad1b0d758d6df4999a0b Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 2 Aug 2017 07:56:19 +0200 Subject: crypto: af_alg - consolidation of duplicate code Consolidate following data structures: skcipher_async_req, aead_async_req -> af_alg_async_req skcipher_rsgl, aead_rsql -> af_alg_rsgl skcipher_tsgl, aead_tsql -> af_alg_tsgl skcipher_ctx, aead_ctx -> af_alg_ctx Consolidate following functions: skcipher_sndbuf, aead_sndbuf -> af_alg_sndbuf skcipher_writable, aead_writable -> af_alg_writable skcipher_rcvbuf, aead_rcvbuf -> af_alg_rcvbuf skcipher_readable, aead_readable -> af_alg_readable aead_alloc_tsgl, skcipher_alloc_tsgl -> af_alg_alloc_tsgl aead_count_tsgl, skcipher_count_tsgl -> af_alg_count_tsgl aead_pull_tsgl, skcipher_pull_tsgl -> af_alg_pull_tsgl aead_free_areq_sgls, skcipher_free_areq_sgls -> af_alg_free_areq_sgls aead_wait_for_wmem, skcipher_wait_for_wmem -> af_alg_wait_for_wmem aead_wmem_wakeup, skcipher_wmem_wakeup -> af_alg_wmem_wakeup aead_wait_for_data, skcipher_wait_for_data -> af_alg_wait_for_data aead_data_wakeup, skcipher_data_wakeup -> af_alg_data_wakeup aead_sendmsg, skcipher_sendmsg -> af_alg_sendmsg aead_sendpage, skcipher_sendpage -> af_alg_sendpage aead_async_cb, skcipher_async_cb -> af_alg_async_cb aead_poll, skcipher_poll -> af_alg_poll Split out the following common code from recvmsg: af_alg_alloc_areq: allocation of the request data structure for the cipher operation af_alg_get_rsgl: creation of the RX SGL anchored in the request data structure The following changes to the implementation without affecting the functionality have been applied to synchronize slightly different code bases in algif_skcipher and algif_aead: The wakeup in af_alg_wait_for_data is triggered when either more data is received or the indicator that more data is to be expected is released. The first is triggered by user space, the second is triggered by the kernel upon finishing the processing of data (i.e. the kernel is ready for more). af_alg_sendmsg uses size_t in min_t calculation for obtaining len. Return code determination is consistent with algif_skcipher. The scope of the variable i is reduced to match algif_aead. The type of the variable i is switched from int to unsigned int to match algif_aead. af_alg_sendpage does not contain the superfluous err = 0 from aead_sendpage. af_alg_async_cb requires to store the number of output bytes in areq->outlen before the AIO callback is triggered. The POLLIN / POLLRDNORM is now set when either not more data is given or the kernel is supplied with data. This is consistent to the wakeup from sleep when the kernel waits for data. The request data structure is extended by the field last_rsgl which points to the last RX SGL list entry. This shall help recvmsg implementation to chain the RX SGL to other SG(L)s if needed. It is currently used by algif_aead which chains the tag SGL to the RX SGL during decryption. Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- include/crypto/if_alg.h | 170 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) (limited to 'include') diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index e2b9c6fe2714..75ec9c662268 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -20,6 +20,9 @@ #include #include +#include +#include + #define ALG_MAX_PAGES 16 struct crypto_async_request; @@ -68,6 +71,99 @@ struct af_alg_sgl { unsigned int npages; }; +/* TX SGL entry */ +struct af_alg_tsgl { + struct list_head list; + unsigned int cur; /* Last processed SG entry */ + struct scatterlist sg[0]; /* Array of SGs forming the SGL */ +}; + +#define MAX_SGL_ENTS ((4096 - sizeof(struct af_alg_tsgl)) / \ + sizeof(struct scatterlist) - 1) + +/* RX SGL entry */ +struct af_alg_rsgl { + struct af_alg_sgl sgl; + struct list_head list; + size_t sg_num_bytes; /* Bytes of data in that SGL */ +}; + +/** + * struct af_alg_async_req - definition of crypto request + * @iocb: IOCB for AIO operations + * @sk: Socket the request is associated with + * @first_rsgl: First RX SG + * @last_rsgl: Pointer to last RX SG + * @rsgl_list: Track RX SGs + * @tsgl: Private, per request TX SGL of buffers to process + * @tsgl_entries: Number of entries in priv. TX SGL + * @outlen: Number of output bytes generated by crypto op + * @areqlen: Length of this data structure + * @cra_u: Cipher request + */ +struct af_alg_async_req { + struct kiocb *iocb; + struct sock *sk; + + struct af_alg_rsgl first_rsgl; + struct af_alg_rsgl *last_rsgl; + struct list_head rsgl_list; + + struct scatterlist *tsgl; + unsigned int tsgl_entries; + + unsigned int outlen; + unsigned int areqlen; + + union { + struct aead_request aead_req; + struct skcipher_request skcipher_req; + } cra_u; + + /* req ctx trails this struct */ +}; + +/** + * struct af_alg_ctx - definition of the crypto context + * + * The crypto context tracks the input data during the lifetime of an AF_ALG + * socket. + * + * @tsgl_list: Link to TX SGL + * @iv: IV for cipher operation + * @aead_assoclen: Length of AAD for AEAD cipher operations + * @completion: Work queue for synchronous operation + * @used: TX bytes sent to kernel. This variable is used to + * ensure that user space cannot cause the kernel + * to allocate too much memory in sendmsg operation. + * @rcvused: Total RX bytes to be filled by kernel. This variable + * is used to ensure user space cannot cause the kernel + * to allocate too much memory in a recvmsg operation. + * @more: More data to be expected from user space? + * @merge: Shall new data from user space be merged into existing + * SG? + * @enc: Cryptographic operation to be performed when + * recvmsg is invoked. + * @len: Length of memory allocated for this data structure. + */ +struct af_alg_ctx { + struct list_head tsgl_list; + + void *iv; + size_t aead_assoclen; + + struct af_alg_completion completion; + + size_t used; + size_t rcvused; + + bool more; + bool merge; + bool enc; + + unsigned int len; +}; + int af_alg_register_type(const struct af_alg_type *type); int af_alg_unregister_type(const struct af_alg_type *type); @@ -94,4 +190,78 @@ static inline void af_alg_init_completion(struct af_alg_completion *completion) init_completion(&completion->completion); } +/** + * Size of available buffer for sending data from user space to kernel. + * + * @sk socket of connection to user space + * @return number of bytes still available + */ +static inline int af_alg_sndbuf(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + + return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - + ctx->used, 0); +} + +/** + * Can the send buffer still be written to? + * + * @sk socket of connection to user space + * @return true => writable, false => not writable + */ +static inline bool af_alg_writable(struct sock *sk) +{ + return PAGE_SIZE <= af_alg_sndbuf(sk); +} + +/** + * Size of available buffer used by kernel for the RX user space operation. + * + * @sk socket of connection to user space + * @return number of bytes still available + */ +static inline int af_alg_rcvbuf(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct af_alg_ctx *ctx = ask->private; + + return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - + ctx->rcvused, 0); +} + +/** + * Can the RX buffer still be written to? + * + * @sk socket of connection to user space + * @return true => writable, false => not writable + */ +static inline bool af_alg_readable(struct sock *sk) +{ + return PAGE_SIZE <= af_alg_rcvbuf(sk); +} + +int af_alg_alloc_tsgl(struct sock *sk); +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset); +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, + size_t dst_offset); +void af_alg_free_areq_sgls(struct af_alg_async_req *areq); +int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags); +void af_alg_wmem_wakeup(struct sock *sk); +int af_alg_wait_for_data(struct sock *sk, unsigned flags); +void af_alg_data_wakeup(struct sock *sk); +int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, + unsigned int ivsize); +ssize_t af_alg_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags); +void af_alg_async_cb(struct crypto_async_request *_req, int err); +unsigned int af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait); +struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, + unsigned int areqlen); +int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, + struct af_alg_async_req *areq, size_t maxsize, + size_t *outlen); + #endif /* _CRYPTO_IF_ALG_H */ -- cgit From 9047811b776ce09ba06623dd2a846cc501f0065b Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Wed, 28 Jun 2017 09:02:45 -0500 Subject: RDMA/core: Add wait/retry version of ibnl_unicast Add a wait/retry version of ibnl_unicast, ibnl_unicast_wait, and modify ibnl_unicast to not wait/retry. This eliminates the undesirable wait for future users of ibnl_unicast. Change Portmapper calls originating from kernel to user-space to use ibnl_unicast_wait and take advantage of the wait/retry logic in netlink_unicast. Signed-off-by: Mustafa Ismail Signed-off-by: Chien Tin Tung Signed-off-by: Leon Romanovsky --- include/rdma/rdma_netlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 348c102cb5f6..5b1466770917 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -63,6 +63,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, __u32 pid); +/** + * Send, with wait/1 retry, the supplied skb to a specific userspace PID. + * @skb: The netlink skb + * @nlh: Header of the netlink message to send + * @pid: Userspace netlink process ID + * Returns 0 on success or a negative error code. + */ +int ibnl_unicast_wait(struct sk_buff *skb, struct nlmsghdr *nlh, + __u32 pid); + /** * Send the supplied skb to a netlink group. * @skb: The netlink skb -- cgit From 79bcd34ccfe009ad21d16100ae2aef9b378a512d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 1 Aug 2017 07:53:30 -0400 Subject: media: cec-funcs.h: cec_ops_report_features: set *dev_features to NULL gcc can get confused by this code and it thinks dev_features can be returned uninitialized. So initialize to NULL at the beginning to shut up the warning. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/cec-funcs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index c451eec42a83..270b251a3d9b 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -895,6 +895,7 @@ static inline void cec_ops_report_features(const struct cec_msg *msg, *cec_version = msg->msg[2]; *all_device_types = msg->msg[3]; *rc_profile = p; + *dev_features = NULL; while (p < &msg->msg[14] && (*p & CEC_OP_FEAT_EXT)) p++; if (!(*p & CEC_OP_FEAT_EXT)) { -- cgit From ee0c503eacfb97e1d443baf5a03939201d0244f5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 4 Aug 2017 06:41:51 -0400 Subject: media: media/cec.h: add CEC_CAP_DEFAULTS The CEC_CAP_LOG_ADDRS, CEC_CAP_TRANSMIT, CEC_CAP_PASSTHROUGH and CEC_CAP_RC capabilities are normally always present. Add a CEC_CAP_DEFAULTS define that ORs these four caps to simplify drivers. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 224a6e225c52..1bec7bde4792 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -31,6 +31,9 @@ #include #include +#define CEC_CAP_DEFAULTS (CEC_CAP_LOG_ADDRS | CEC_CAP_TRANSMIT | \ + CEC_CAP_PASSTHROUGH | CEC_CAP_RC) + /** * struct cec_devnode - cec device node * @dev: cec device -- cgit From af74030591c13331760fe66246338f64ded3f76f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 2 Aug 2017 13:14:49 -0400 Subject: media: drv-intf: saa7146: constify pci_device_id pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. So making 'pci_tbl' as const member of 'struct saa7146_extension'. Signed-off-by: Arvind Yadav Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/drv-intf/saa7146.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/drv-intf/saa7146.h b/include/media/drv-intf/saa7146.h index 96058a5a4acc..45294328614d 100644 --- a/include/media/drv-intf/saa7146.h +++ b/include/media/drv-intf/saa7146.h @@ -96,7 +96,7 @@ struct saa7146_extension supported devices, last entry 0xffff, 0xfff */ struct module *module; struct pci_driver driver; - struct pci_device_id *pci_tbl; + const struct pci_device_id *pci_tbl; /* extension functions */ int (*probe)(struct saa7146_dev *); -- cgit From cb87481ee89dbd6609e227afbf64900fb4e5c930 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 26 Jul 2017 22:46:27 +1000 Subject: kbuild: linker script do not match C names unless LD_DEAD_CODE_DATA_ELIMINATION is configured The .data and .bss sections were modified in the generic linker script to pull in sections named .data., which are generated by gcc with -ffunction-sections and -fdata-sections options. The problem with this pattern is it can also match section names that Linux defines explicitly, e.g., .data.unlikely. This can cause Linux sections to get moved into the wrong place. The way to avoid this is to use ".." separators for explicit section names (the dot character is valid in a section name but not a C identifier). However currently there are sections which don't follow this rule, so for now just disable the wild card by default. Example: http://marc.info/?l=linux-arm-kernel&m=150106824024221&w=2 Cc: # 4.9 Fixes: b67067f1176df ("kbuild: allow archs to select link dead code/data elimination") Signed-off-by: Nicholas Piggin Signed-off-by: Masahiro Yamada --- include/asm-generic/vmlinux.lds.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da0be9a8d1de..9623d78f8494 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -59,6 +59,22 @@ /* Align . to a 8 byte boundary equals to maximum function alignment. */ #define ALIGN_FUNCTION() . = ALIGN(8) +/* + * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which + * generates .data.identifier sections, which need to be pulled in with + * .data. We don't want to pull in .data..other sections, which Linux + * has defined. Same for text and bss. + */ +#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#else +#define TEXT_MAIN .text +#define DATA_MAIN .data +#define BSS_MAIN .bss +#endif + /* * Align to a 32 byte boundary equal to the * alignment gcc 4.5 uses for a struct @@ -198,12 +214,9 @@ /* * .data section - * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates - * .data.identifier which needs to be pulled in with .data, but don't want to - * pull in .data..stuff which has its own requirements. Same for bss. */ #define DATA_DATA \ - *(.data .data.[0-9a-zA-Z_]*) \ + *(DATA_MAIN) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data) \ @@ -434,16 +447,17 @@ VMLINUX_SYMBOL(__security_initcall_end) = .; \ } -/* .text section. Map to function alignment to avoid address changes +/* + * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map - * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates - * .text.identifier which needs to be pulled in with .text , but some - * architectures define .text.foo which is not intended to be pulled in here. - * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have - * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */ + * + * TEXT_MAIN here will match .text.fixup and .text.unlikely if dead + * code elimination is enabled, so these sections should be converted + * to use ".." first. + */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot .text .text.fixup .text.unlikely) \ + *(.text.hot TEXT_MAIN .text.fixup .text.unlikely) \ *(.ref.text) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ @@ -613,7 +627,7 @@ BSS_FIRST_SECTIONS \ *(.bss..page_aligned) \ *(.dynbss) \ - *(.bss .bss.[0-9a-zA-Z_]*) \ + *(BSS_MAIN) \ *(COMMON) \ } -- cgit From 6ec4e2514decd6fb4782a9364fa71d6244d05af4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 13 Jul 2017 18:16:01 +0100 Subject: md/raid6: implement recovery using ARM NEON intrinsics Provide a NEON accelerated implementation of the recovery algorithm, which supersedes the default byte-by-byte one. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- include/linux/raid/pq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 30f945329818..583cdd3d49ca 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -121,6 +121,7 @@ extern const struct raid6_recov_calls raid6_recov_ssse3; extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; +extern const struct raid6_recov_calls raid6_recov_neon; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; -- cgit From d62e26b3ffd28f16ddae85a1babd0303a1a6dfb6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Jun 2017 21:55:08 -0600 Subject: block: pass in queue to inflight accounting No functional change in this patch, just in preparation for basing the inflight mechanism on the queue in question. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 +++++---- include/linux/genhd.h | 14 +++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b1cf4ba0902..9276788a9b24 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -463,10 +463,11 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -void generic_start_io_acct(int rw, unsigned long sectors, - struct hd_struct *part); -void generic_end_io_acct(int rw, struct hd_struct *part, - unsigned long start_time); +void generic_start_io_acct(struct request_queue *q, int rw, + unsigned long sectors, struct hd_struct *part); +void generic_end_io_acct(struct request_queue *q, int rw, + struct hd_struct *part, + unsigned long start_time); #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e619fae2f037..7f7427e00f9c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -362,23 +362,27 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct hd_struct *part, int rw) +static inline void part_inc_in_flight(struct request_queue *q, + struct hd_struct *part, int rw) { atomic_inc(&part->in_flight[rw]); if (part->partno) atomic_inc(&part_to_disk(part)->part0.in_flight[rw]); } -static inline void part_dec_in_flight(struct hd_struct *part, int rw) +static inline void part_dec_in_flight(struct request_queue *q, + struct hd_struct *part, int rw) { atomic_dec(&part->in_flight[rw]); if (part->partno) atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); } -static inline int part_in_flight(struct hd_struct *part) +static inline int part_in_flight(struct request_queue *q, + struct hd_struct *part) { - return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]); + return atomic_read(&part->in_flight[0]) + + atomic_read(&part->in_flight[1]); } static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) @@ -395,7 +399,7 @@ static inline void free_part_info(struct hd_struct *part) } /* block/blk-core.c */ -extern void part_round_stats(int cpu, struct hd_struct *part); +extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk); -- cgit From 0609e0efc5e15195ecf8c6d2f2e890d98760e337 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Aug 2017 17:49:47 -0600 Subject: block: make part_in_flight() take an array of two ints Instead of returning the count that matches the partition, pass in an array of two ints. Index 0 will be filled with the inflight count for the partition in question, and index 1 will filled with the root inflight count, if the partition passed in is not the root. This is in preparation for being able to calculate both in one go. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/genhd.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7f7427e00f9c..f2a3a26cdda1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -378,11 +378,18 @@ static inline void part_dec_in_flight(struct request_queue *q, atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); } -static inline int part_in_flight(struct request_queue *q, - struct hd_struct *part) +static inline void part_in_flight(struct request_queue *q, + struct hd_struct *part, + unsigned int inflight[2]) { - return atomic_read(&part->in_flight[0]) + + inflight[0] = atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]); + if (part->partno) { + part = &part_to_disk(part)->part0; + inflight[1] = atomic_read(&part->in_flight[0]) + + atomic_read(&part->in_flight[1]); + } else + inflight[1] = 0; } static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) -- cgit From f299b7c7a9dee64425e5965bd4f56dc024c1befc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Aug 2017 17:51:45 -0600 Subject: blk-mq: provide internal in-flight variant We don't have to inc/dec some counter, since we can just iterate the tags. That makes inc/dec a noop, but means we have to iterate busy tags to get an in-flight count. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/genhd.h | 35 ++++++----------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f2a3a26cdda1..ea652bfcd675 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -362,35 +362,12 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct request_queue *q, - struct hd_struct *part, int rw) -{ - atomic_inc(&part->in_flight[rw]); - if (part->partno) - atomic_inc(&part_to_disk(part)->part0.in_flight[rw]); -} - -static inline void part_dec_in_flight(struct request_queue *q, - struct hd_struct *part, int rw) -{ - atomic_dec(&part->in_flight[rw]); - if (part->partno) - atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); -} - -static inline void part_in_flight(struct request_queue *q, - struct hd_struct *part, - unsigned int inflight[2]) -{ - inflight[0] = atomic_read(&part->in_flight[0]) + - atomic_read(&part->in_flight[1]); - if (part->partno) { - part = &part_to_disk(part)->part0; - inflight[1] = atomic_read(&part->in_flight[0]) + - atomic_read(&part->in_flight[1]); - } else - inflight[1] = 0; -} +void part_in_flight(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]); +void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, + int rw); +void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, + int rw); static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) { -- cgit From 0a4a060bb204c47825eb4f7c27f66fc7ee85d508 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 9 Aug 2017 19:09:44 -0400 Subject: sock: fix zerocopy_success regression with msg_zerocopy Do not use uarg->zerocopy outside msg_zerocopy. In other paths the field is not explicitly initialized and aliases another field. Those paths have only one reference so do not need this intermediate variable. Call uarg->callback directly. Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8c0708d2e5e6..7594e19bce62 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1273,8 +1273,13 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy) struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { - uarg->zerocopy = uarg->zerocopy && zerocopy; - sock_zerocopy_put(uarg); + if (uarg->callback == sock_zerocopy_callback) { + uarg->zerocopy = uarg->zerocopy && zerocopy; + sock_zerocopy_put(uarg); + } else { + uarg->callback(uarg, zerocopy); + } + skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG; } } -- cgit From 92b31a9af73b3a3fc801899335d6c47966351830 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 10 Aug 2017 01:39:55 +0200 Subject: bpf: add BPF_J{LT,LE,SLT,SLE} instructions Currently, eBPF only understands BPF_JGT (>), BPF_JGE (>=), BPF_JSGT (s>), BPF_JSGE (s>=) instructions, this means that particularly *JLT/*JLE counterparts involving immediates need to be rewritten from e.g. X < [IMM] by swapping arguments into [IMM] > X, meaning the immediate first is required to be loaded into a register Y := [IMM], such that then we can compare with Y > X. Note that the destination operand is always required to be a register. This has the downside of having unnecessarily increased register pressure, meaning complex program would need to spill other registers temporarily to stack in order to obtain an unused register for the [IMM]. Loading to registers will thus also affect state pruning since we need to account for that register use and potentially those registers that had to be spilled/filled again. As a consequence slightly more stack space might have been used due to spilling, and BPF programs are a bit longer due to extra code involving the register load and potentially required spill/fills. Thus, add BPF_JLT (<), BPF_JLE (<=), BPF_JSLT (s<), BPF_JSLE (s<=) counterparts to the eBPF instruction set. Modifying LLVM to remove the NegateCC() workaround in a PoC patch at [1] and allowing it to also emit the new instructions resulted in cilium's BPF programs that are injected into the fast-path to have a reduced program length in the range of 2-3% (e.g. accumulated main and tail call sections from one of the object file reduced from 4864 to 4729 insns), reduced complexity in the range of 10-30% (e.g. accumulated sections reduced in one of the cases from 116432 to 88428 insns), and reduced stack usage in the range of 1-5% (e.g. accumulated sections from one of the object files reduced from 824 to 784b). The modification for LLVM will be incorporated in a backwards compatible way. Plan is for LLVM to have i) a target specific option to offer a possibility to explicitly enable the extension by the user (as we have with -m target specific extensions today for various CPU insns), and ii) have the kernel checked for presence of the extensions and enable them transparently when the user is selecting more aggressive options such as -march=native in a bpf target context. (Other frontends generating BPF byte code, e.g. ply can probe the kernel directly for its code generation.) [1] https://github.com/borkmann/llvm/tree/bpf-insns Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1d06be1569b1..91da8371a2d0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -30,9 +30,14 @@ #define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_BE BPF_TO_BE +/* jmp encodings */ #define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ -- cgit From b97bac64a589d0158cf866e8995e831030f68f4f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Aug 2017 20:41:48 +0200 Subject: rtnetlink: make rtnl_register accept a flags parameter This change allows us to later indicate to rtnetlink core that certain doit functions should be called without acquiring rtnl_mutex. This change should have no effect, we simply replace the last (now unused) calcit argument with the new flag. Signed-off-by: Florian Westphal Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index abe6b733d473..ac32460a0adb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -7,12 +7,11 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); -typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *); int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func); + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, - rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func); + rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); int rtnl_unregister(int protocol, int msgtype); void rtnl_unregister_all(int protocol); -- cgit From 62256f98f244fbb1c7a10465e1ee412f209d8978 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Aug 2017 20:41:52 +0200 Subject: rtnetlink: add RTNL_FLAG_DOIT_UNLOCKED Allow callers to tell rtnetlink core that its doit callback should be invoked without holding rtnl mutex. Signed-off-by: Florian Westphal Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index ac32460a0adb..21837ca68ecc 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -8,6 +8,10 @@ typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *); typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); +enum rtnl_link_flags { + RTNL_FLAG_DOIT_UNLOCKED = 1, +}; + int __rtnl_register(int protocol, int msgtype, rtnl_doit_func, rtnl_dumpit_func, unsigned int flags); void rtnl_register(int protocol, int msgtype, -- cgit From 68277a2c9d32fd9090247a5c08aaf1353049c0b1 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 9 Aug 2017 14:41:16 +0200 Subject: net-next: dsa: move struct dsa_device_ops to the global header file We need to access this struct from within the flow_dissector to fix dissection for packets coming in on DSA devices. Signed-off-by: Muciri Gatimu Signed-off-by: Shashidhar Lakkavalli Signed-off-by: John Crispin Signed-off-by: David S. Miller --- include/net/dsa.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index a4f66dbb4b7c..65d7804c6f69 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -101,6 +101,13 @@ struct dsa_platform_data { struct packet_type; +struct dsa_device_ops { + struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev); +}; + struct dsa_switch_tree { struct list_head list; -- cgit From 598a968011ffc4d624934995fe46d06bd450cdf4 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Wed, 9 Aug 2017 14:41:17 +0200 Subject: net-next: dsa: add flow_dissect callback to struct dsa_device_ops When the flow dissector first sees packets coming in on a DSA devices the 802.3 header wont be located where the code expects it to be as the tag is still present. Adding this new callback allows a DSA device to provide a new function that the flow_dissector can use to get the correct protocol and offset of the network header. Signed-off-by: Muciri Gatimu Signed-off-by: Shashidhar Lakkavalli Signed-off-by: John Crispin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 65d7804c6f69..7f46b521313e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -106,6 +106,8 @@ struct dsa_device_ops { struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); + int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, + int *offset); }; struct dsa_switch_tree { -- cgit From bfe334924ccd9f4a53f30240c03cf2f43f5b2df1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2017 19:39:30 +0200 Subject: perf/x86: Fix RDPMC vs. mm_struct tracking Vince reported the following rdpmc() testcase failure: > Failing test case: > > fd=perf_event_open(); > addr=mmap(fd); > exec() // without closing or unmapping the event > fd=perf_event_open(); > addr=mmap(fd); > rdpmc() // GPFs due to rdpmc being disabled The problem is of course that exec() plays tricks with what is current->mm, only destroying the old mappings after having installed the new mm. Fix this confusion by passing along vma->vm_mm instead of relying on current->mm. Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Link: http://lkml.kernel.org/r/20170802173930.cstykcqefmqt7jau@hirez.programming.kicks-ass.net [ Minor cleanups. ] Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3b873fc59e4..b14095bcf4bb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,8 +310,8 @@ struct pmu { * Notification that the event was mapped or unmapped. Called * in the context of the mapping task. */ - void (*event_mapped) (struct perf_event *event); /*optional*/ - void (*event_unmapped) (struct perf_event *event); /*optional*/ + void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ + void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ /* * Flags for ->add()/->del()/ ->start()/->stop(). There are -- cgit From c9901724a2f14128ef6a57986babcbfbcf61a257 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2017 10:20:11 +0300 Subject: RDMA/netlink: Remove netlink clients infrastructure RDMA netlink has a complicated infrastructure for dynamically registering and de-registering netlink clients to the NETLINK_RDMA group. The complicated portion of this code is not widely used because 2 of the 3 current clients are statically compiled together with netlink.c. The infrastructure, therefore, is deemed overkill. Refactor the code to eliminate the dynamically added clients. Now all clients are pre-registered in a client array at compile time, and at run time they merely check-in with the infrastructure to pass their callback table for inclusion in the pre-sized client array. This also allows for future cleanups and removal of unneeded code in the iwcm* netlink handler. Signed-off-by: Leon Romanovsky Reviewed-by: Chien Tin Tung --- include/rdma/rdma_netlink.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 5b1466770917..aadf0ab963b2 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -11,23 +11,18 @@ struct ibnl_client_cbs { }; /** - * Add a a client to the list of IB netlink exporters. + * Register client in RDMA netlink. * @index: Index of the added client - * @nops: Number of supported ops by the added client. * @cb_table: A table for op->callback - * - * Returns 0 on success or a negative error code. */ -int ibnl_add_client(int index, int nops, - const struct ibnl_client_cbs cb_table[]); +void rdma_nl_register(unsigned int index, + const struct ibnl_client_cbs cb_table[]); /** * Remove a client from IB netlink. * @index: Index of the removed IB client. - * - * Returns 0 on success or a negative error code. */ -int ibnl_remove_client(int index); +void rdma_nl_unregister(unsigned int index); /** * Put a new message in a supplied skb. -- cgit From 64401b69b29164c5731018cc44fc9b144ac9c5ae Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 May 2017 11:29:56 +0300 Subject: RDMA/netlink: Remove redundant owner option for netlink callbacks Owner field is not needed to be set because netlink is part of ib_core which will be unloaded last after all other modules are unloaded. Signed-off-by: Leon Romanovsky --- include/rdma/rdma_netlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index aadf0ab963b2..c124d8e43fc8 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -7,7 +7,6 @@ struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); - struct module *module; }; /** -- cgit From 18f08dae19990f5fffde92e3a63e0d90cda0f1a8 Mon Sep 17 00:00:00 2001 From: Cheng Jian Date: Fri, 4 Aug 2017 17:19:37 +0800 Subject: sched/core: Remove unnecessary initialization init_idle_bootup_task() init_idle_bootup_task( ) is called in rest_init( ) to switch the scheduling class of the boot thread to the idle class. the function only sets: idle->sched_class = &idle_sched_class; which has been set in init_idle() called by sched_init(): /* * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; We've already set the boot thread to idle class in start_kernel()->sched_init()->init_idle() so it's unnecessary to set it again in start_kernel()->rest_init()->init_idle_bootup_task() Signed-off-by: Cheng Jian Signed-off-by: Xie XiuQi Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1501838377-109720-1-git-send-email-cj.chengjian@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched/task.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index c97e5f096927..79a2a744648d 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -30,7 +30,6 @@ extern int lockdep_tasklist_lock_is_held(void); extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); -extern void init_idle_bootup_task(struct task_struct *idle); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); extern void sched_dead(struct task_struct *p); -- cgit From 74dc3384fc7983b78cc46ebb1824968a3db85eb1 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sun, 6 Aug 2017 14:41:41 +1000 Subject: sched/debug: Use task_pid_nr_ns in /proc/$pid/sched It appears as though the addition of the PID namespace did not update the output code for /proc/*/sched, which resulted in it providing PIDs that were not self-consistent with the /proc mount. This additionally made it trivial to detect whether a process was inside &init_pid_ns from userspace, making container detection trivial: https://github.com/jessfraz/amicontained This leads to situations such as: % unshare -pmf % mount -t proc proc /proc % head -n1 /proc/1/sched head (10047, #threads: 1) Fix this by just using task_pid_nr_ns for the output of /proc/*/sched. All of the other uses of task_pid_nr in kernel/sched/debug.c are from a sysctl context and thus don't need to be namespaced. Signed-off-by: Aleksa Sarai Signed-off-by: Peter Zijlstra (Intel) Acked-by: Eric W. Biederman Cc: Jess Frazelle Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: cyphar@cyphar.com Link: http://lkml.kernel.org/r/20170806044141.5093-1-asarai@suse.com Signed-off-by: Ingo Molnar --- include/linux/sched/debug.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index e0eaee54c5a4..5d58d49e9f87 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -6,6 +6,7 @@ */ struct task_struct; +struct pid_namespace; extern void dump_cpu_task(int cpu); @@ -34,7 +35,8 @@ extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_SCHED_DEBUG struct seq_file; -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); +extern void proc_sched_show_task(struct task_struct *p, + struct pid_namespace *ns, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); #endif -- cgit From 20435d84e5f2041c64c792399ab6f2948a2c2252 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Mon, 7 Aug 2017 16:44:23 +0800 Subject: sched/debug: Intruduce task_state_to_char() helper function Now that we have more than one place to get the task state, intruduce the task_state_to_char() helper function to save some code. No functionality changed. Signed-off-by: Xie XiuQi Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1502095463-160172-3-git-send-email-xiexiuqi@huawei.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..c28b182c9833 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1229,6 +1229,19 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) return task_pgrp_nr_ns(tsk, &init_pid_ns); } +static inline char task_state_to_char(struct task_struct *task) +{ + const char stat_nam[] = TASK_STATE_TO_CHAR_STR; + unsigned long state = task->state; + + state = state ? __ffs(state) + 1 : 0; + + /* Make sure the string lines up properly with the number of task states: */ + BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); + + return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'; +} + /** * is_global_init - check if a task structure is init. Since init * is free to have sub-threads we need to check tgid. -- cgit From e3a2b93dddad315f01a4b67faee738954c084072 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Jun 2017 16:00:19 +0300 Subject: RDMA/netlink: Add flag to consolidate common handling Add ability to provide flags to control RDMA netlink callbacks and convert addr.c and sa_query.c to be first users of such infrastructure. It allows to move their CAP_NET_ADMIN checks into netlink core. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/rdma/rdma_netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index c124d8e43fc8..6ea36ec45401 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -7,6 +7,12 @@ struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); + u8 flags; +}; + +enum rdma_nl_flags { + /* Require CAP_NET_ADMIN */ + RDMA_NL_ADMIN_PERM = 1 << 0, }; /** -- cgit From f00e64637061876ec7b6383b0bd80197c51e7312 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 15:35:20 +0300 Subject: RDMA/netlink: Rename and remove redundant parameter from ibnl_unicast* Netlink message header is not needed for unicast reply, hence remove it. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/rdma/rdma_netlink.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 6ea36ec45401..e7b0779385e9 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -56,22 +56,18 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, /** * Send the supplied skb to a specific userspace PID. * @skb: The netlink skb - * @nlh: Header of the netlink message to send * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, - __u32 pid); +int rdma_nl_unicast(struct sk_buff *skb, u32 pid); /** * Send, with wait/1 retry, the supplied skb to a specific userspace PID. * @skb: The netlink skb - * @nlh: Header of the netlink message to send * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int ibnl_unicast_wait(struct sk_buff *skb, struct nlmsghdr *nlh, - __u32 pid); +int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. -- cgit From 4d7f693af0c9d0d6940ff36f5adca1adfa0e7e6e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 15:44:32 +0300 Subject: RDMA/netlink: Rename and remove redundant parameter from ibnl_multicast The pointer to netlink header was not used in the ibnl_multicast function, so let's remove it and simplify the function signature. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/rdma/rdma_netlink.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index e7b0779385e9..16a94c425938 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -72,12 +72,10 @@ int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. * @skb: The netlink skb - * @nlh: Header of the netlink message to send * @group: Netlink group ID * @flags: allocation flags * Returns 0 on success or a negative error code. */ -int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, - unsigned int group, gfp_t flags); +int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); #endif /* _RDMA_NETLINK_H */ -- cgit From ff61c425c1c563f1d688d59caf3b18a395cbf9c4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 15:51:16 +0300 Subject: RDMA/netlink: Simplify and rename ibnl_chk_listeners Make ibnl_chk_listeners function to be one line by removing unneeded comparison. Rename that function to be complaint to other functions in RDMA netlink. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/rdma/rdma_netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 16a94c425938..348e0bbe0fc9 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -78,4 +78,10 @@ int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); */ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); +/** + * Check if there are any listeners to the netlink group + * @group: the netlink group ID + * Returns 0 on success or a negative for no listeners. + */ +int rdma_nl_chk_listeners(unsigned int group); #endif /* _RDMA_NETLINK_H */ -- cgit From 3250b4dbd87aa08c21891cabfc6f6b48b36fd7e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 19 Jun 2017 18:23:45 +0300 Subject: RDMA/netlink: Rename netlink callback struct The RDMA netlink client infrastructure was removed and made obsolete. The old infrastructure defined struct ibnl_client_cbs. Now that all uses of this have been updated to the new infrastructure, rename the struct to be compliant with the current stack naming standards: struct rdma_nl_cbs. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/rdma/rdma_netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 348e0bbe0fc9..92f8832297ab 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -5,7 +5,7 @@ #include #include -struct ibnl_client_cbs { +struct rdma_nl_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); u8 flags; }; @@ -21,7 +21,7 @@ enum rdma_nl_flags { * @cb_table: A table for op->callback */ void rdma_nl_register(unsigned int index, - const struct ibnl_client_cbs cb_table[]); + const struct rdma_nl_cbs cb_table[]); /** * Remove a client from IB netlink. -- cgit From ecc82c53f9a4ce08ba7df626a4262c86841ced8f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 14:39:59 +0300 Subject: RDMA/core: Add and expose static device index This patch adds static device index in similar fashion to already available in netdev world (struct net->ifindex). In downstream patches, the RDMA nelink will use this idx-to-ib_device conversion, so as part of this commit, we are exposing the translation function to be visible for IB/core users. Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1082b4c81b2c..3391df5fdc9c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2298,6 +2298,8 @@ struct ib_device { struct rdmacg_device cg_device; #endif + u32 index; + /** * The following mandatory functions are used only at device * registration. Keep functions such as these at the end of this -- cgit From 1830ba21b9a475cfc6159e6cfe532c75fe7682a4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Jun 2017 12:46:33 +0300 Subject: RDMA/netlink: Add and implement doit netlink callback The .doit callback is used by netlink core to differentiate between get and set operations. Common convention is to use that call for command operations like (SET, ADD, e.t.c.) and/or access without NLF_M_DUMP flag. This commit adds proper declaration and implementation to RDMA netlink. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/rdma/rdma_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 92f8832297ab..e25bf1988846 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -6,6 +6,8 @@ #include struct rdma_nl_cbs { + int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); u8 flags; }; -- cgit From 1a6e7c31d71db34d1b9bc3acc87eaea6c2ecc997 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 07:55:53 +0300 Subject: RDMA/netlink: Add netlink device definitions to UAPI Introduce new defines to rdma_netlink.h, so the RDMA configuration tool will be able to communicate with RDMA subsystem by using the shared defines. The addition of new client (NLDEV) revealed the fact that we exposed by mistake the RDMA_NL_I40IW define which is not backed by any RDMA netlink by now and it won't be exposed in the future too. So this patch reuses the value and deletes the old defines. The NLDEV operates with objects. The struct ib_device has two straightforward objects: device itself and ports of that device. This brings us to propose the following commands to work on those objects: * RDMA_NLDEV_CMD_{GET,SET,NEW,DEL} - works on ib_device itself * RDMA_NLDEV_CMD_PORT_{GET,SET,NEW,DEL} - works on ports of specific ib_device Those commands receive/return the device index (RDMA_NLDEV_ATTR_DEV_INDEX) and port index (RDMA_NLDEV_ATTR_PORT_INDEX). For device object accesses, the RDMA_NLDEV_ATTR_PORT_INDEX will return the maximum number of ports for specific ib_device and for port access the actual port index. The port index starts from 1 to follow RDMA/core internal semantics and the sysfs exposed knobs. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/uapi/rdma/rdma_netlink.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 02fe8390c18f..a44229fa5eca 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -8,7 +8,7 @@ enum { RDMA_NL_IWCM, RDMA_NL_RSVD, RDMA_NL_LS, /* RDMA Local Services */ - RDMA_NL_I40IW, + RDMA_NL_NLDEV, /* RDMA device interface */ RDMA_NL_NUM_CLIENTS }; @@ -222,4 +222,41 @@ struct rdma_nla_ls_gid { __u8 gid[16]; }; +enum rdma_nldev_command { + RDMA_NLDEV_CMD_UNSPEC, + + RDMA_NLDEV_CMD_GET, /* can dump */ + RDMA_NLDEV_CMD_SET, + RDMA_NLDEV_CMD_NEW, + RDMA_NLDEV_CMD_DEL, + + RDMA_NLDEV_CMD_PORT_GET, /* can dump */ + RDMA_NLDEV_CMD_PORT_SET, + RDMA_NLDEV_CMD_PORT_NEW, + RDMA_NLDEV_CMD_PORT_DEL, + + RDMA_NLDEV_NUM_OPS +}; + +enum rdma_nldev_attr { + /* don't change the order or add anything between, this is ABI! */ + RDMA_NLDEV_ATTR_UNSPEC, + + /* Identifier for ib_device */ + RDMA_NLDEV_ATTR_DEV_INDEX, /* u32 */ + + RDMA_NLDEV_ATTR_DEV_NAME, /* string */ + /* + * Device index together with port index are identifiers + * for port/link properties. + * + * For RDMA_NLDEV_CMD_GET commamnd, port index will return number + * of available ports in ib_device, while for port specific operations, + * it will be real port index as it appears in sysfs. Port index follows + * sysfs notation and starts from 1 for the first port. + */ + RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ + + RDMA_NLDEV_ATTR_MAX +}; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From ac50525374315b9b609747f83b07f8dccb06b722 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 14:47:08 +0300 Subject: RDMA/netlink: Expose device and port capability masks The port capability mask is exposed to user space via sysfs interface, while device capabilities are available for verbs only. This patch provides those capabilities through netlink interface. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- include/uapi/rdma/rdma_netlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index a44229fa5eca..90de11db6580 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -257,6 +257,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ + /* + * Device and port capabilities + */ + RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 9abb0d1bbd9529c574eacd8586e2bf68d17966cd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:49:53 +0300 Subject: RDMA: Simplify get firmware interface There is a need to forward FW version to user space application through RDMA netlink. In order to make it safe, there is need to declare nla_policy and limit the size of FW string. The new define IB_FW_VERSION_NAME_MAX will limit the size of FW version string. That define was chosen to be equal to ETHTOOL_FWVERS_LEN, because many drivers anyway are limited by that value indirectly. The introduction of this define allows us to remove the string size from get_fw_str function signature. Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3391df5fdc9c..e0e87a1f66fb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -64,6 +64,8 @@ #include #include +#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN + extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -2307,7 +2309,7 @@ struct ib_device { * in fast paths. */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); - void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); + void (*get_dev_fw_str)(struct ib_device *, char *str); }; struct ib_client { @@ -2343,7 +2345,7 @@ struct ib_client { struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); -void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len); +void ib_get_device_fw_str(struct ib_device *device, char *str); int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, -- cgit From 8621a7e3c1c22e18385c9ced1647363884ea2aa1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:58:59 +0300 Subject: RDMA/netlink: Export FW version Add FW version to the device properties exported by RDMA netlink, to be used by RDMAtool. Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 90de11db6580..5159858730b0 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -262,6 +262,10 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ + /* + * FW version + */ + RDMA_NLDEV_ATTR_FW_VERSION, /* string */ RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 1aaff896ca6b968a639e3e1e72ba6146ba332501 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 14:01:37 +0300 Subject: RDMA/netlink: Export node_guid and sys_image_guid Add Node GUID and system image GUID to the device properties exported by RDMA netlink, to be used by RDMAtool. Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5159858730b0..fe3a7429e7a1 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -266,6 +266,19 @@ enum rdma_nldev_attr { * FW version */ RDMA_NLDEV_ATTR_FW_VERSION, /* string */ + + /* + * Node GUID (in host byte order) associated with the RDMA device. + */ + RDMA_NLDEV_ATTR_NODE_GUID, /* u64 */ + + /* + * System image GUID (in host byte order) associated with + * this RDMA device and other devices which are part of a + * single system. + */ + RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 12026fbba6af2fc53c3c6cf88bdfc6561986ba82 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:05:14 +0300 Subject: RDMA/netlink: Advertise IB subnet prefix Add IB subnet prefix to the port properties exported by RDMA netlink. Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index fe3a7429e7a1..481003182a35 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -279,6 +279,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ + /* + * Subnet prefix (in host byte order) + */ + RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 80a06dd36f79de7007f21f5cbe42181a4e5c7d6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:38:36 +0300 Subject: RDMA/netink: Export lids and sm_lids According to the IB specification, the LID and SM_LID are 16-bit wide, but to support OmniPath users, export it as 32-bit value from the beginning. Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 481003182a35..7d5caaf54126 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -284,6 +284,14 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ + /* + * Local Identifier (LID), + * According to IB specification, It is 16-bit address assigned + * by the Subnet Manager. Extended to be 32-bit for OmniPath users. + */ + RDMA_NLDEV_ATTR_LID, /* u32 */ + RDMA_NLDEV_ATTR_SM_LID, /* u32 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 34840fea112d36507c19dc6052b8c6d88bdd9c16 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:49:30 +0300 Subject: RDMA/netlink: Export LID mask control (LMC) Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 7d5caaf54126..035706e6b016 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -292,6 +292,11 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_LID, /* u32 */ RDMA_NLDEV_ATTR_SM_LID, /* u32 */ + /* + * LID mask control (LMC) + */ + RDMA_NLDEV_ATTR_LMC, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 5654e49db0b2d87c12b6e120b6a830abe3d3921b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jun 2017 13:12:45 +0300 Subject: RDMA/netlink: Provide port state and physical link state Add port state and physical link state to the users of RDMA netlink. Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 035706e6b016..c488c3cf361b 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -297,6 +297,9 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_LMC, /* u8 */ + RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ + RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 1bb77b8c1d57149ed0aa6825255ead80ae584034 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jun 2017 16:01:29 +0300 Subject: RDMA/netlink: Export node_type Add ability to get node_type for RDAM netlink users. Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index c488c3cf361b..861440a87e7c 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -300,6 +300,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ + RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit From 9d664c0aec3bfdb77fcf7de61cfe1febbecdd389 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Jun 2017 13:05:06 +0200 Subject: locking/atomic: Fix atomic_set_release() for 'funny' architectures Those architectures that have a special atomic_set implementation also need a special atomic_set_release(), because for the very same reason WRITE_ONCE() is broken for them, smp_store_release() is too. The vast majority is architectures that have spinlock hash based atomic implementation except hexagon which seems to have a hardware 'feature'. The spinlock based atomics should be SC, that is, none of them appear to place extra barriers in atomic_cmpxchg() or any of the other SC atomic primitives and therefore seem to rely on their spinlock implementation being SC (I did not fully validate all that). Therefore, the normal atomic_set() is SC and can be used at atomic_set_release(). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf [for tile] Cc: Boqun Feng Cc: Linus Torvalds Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: davem@davemloft.net Cc: james.hogan@imgtec.com Cc: jejb@parisc-linux.org Cc: rkuo@codeaurora.org Cc: vgupta@synopsys.com Link: http://lkml.kernel.org/r/20170609110506.yod47flaav3wgoj5@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/asm-generic/atomic64.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index dad68bf46c77..8d28eb010d0d 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -21,6 +21,8 @@ typedef struct { extern long long atomic64_read(const atomic64_t *v); extern void atomic64_set(atomic64_t *v, long long i); +#define atomic64_set_release(v, i) atomic64_set((v), (i)) + #define ATOMIC64_OP(op) \ extern void atomic64_##op(long long a, atomic64_t *v); -- cgit From 0aa1125fa8bc5e5f98317156728fa4d0293561a5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Jun 2017 21:02:12 +0300 Subject: locking/rwsem-spinlock: Add killable versions of __down_read() Rename __down_read() in __down_read_common() and teach it to abort waiting in case of pending signals and killable state argument passed. Note, that we shouldn't wake anybody up in EINTR path, as: We check for signal_pending_state() after (!waiter.task) test and under spinlock. So, current task wasn't able to be woken up. It may be in two cases: a writer is owner of the sem, or a writer is a first waiter of the sem. If a writer is owner of the sem, no one else may work with it in parallel. It will wake somebody, when it call up_write() or downgrade_write(). If a writer is the first waiter, it will be woken up, when the last active reader releases the sem, and sem->count became 0. Also note, that set_current_state() may be moved down to schedule() (after !waiter.task check), as all assignments in this type of semaphore (including wake_up), occur under spinlock, so we can't miss anything. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: avagin@virtuozzo.com Cc: davem@davemloft.net Cc: fenghua.yu@intel.com Cc: gorcunov@virtuozzo.com Cc: heiko.carstens@de.ibm.com Cc: hpa@zytor.com Cc: ink@jurassic.park.msu.ru Cc: mattst88@gmail.com Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/149789533283.9059.9829416940494747182.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- include/linux/rwsem-spinlock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index ae0528b834cd..e784761a4443 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -32,6 +32,7 @@ struct rw_semaphore { #define RWSEM_UNLOCKED_VALUE 0x00000000 extern void __down_read(struct rw_semaphore *sem); +extern int __must_check __down_read_killable(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); extern int __must_check __down_write_killable(struct rw_semaphore *sem); -- cgit From 83ced169d9a01f22eb39f1fcc1f89ad9d223238f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 19 Jun 2017 21:02:26 +0300 Subject: locking/rwsem-xadd: Add killable versions of rwsem_down_read_failed() Rename rwsem_down_read_failed() in __rwsem_down_read_failed_common() and teach it to abort waiting in case of pending signals and killable state argument passed. Note, that we shouldn't wake anybody up in EINTR path, as: We check for (waiter.task) under spinlock before we go to out_nolock path. Current task wasn't able to be woken up, so there are a writer, owning the sem, or a writer, which is the first waiter. In the both cases we shouldn't wake anybody. If there is a writer, owning the sem, and we were the only waiter, remove RWSEM_WAITING_BIAS, as there are no waiters anymore. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: avagin@virtuozzo.com Cc: davem@davemloft.net Cc: fenghua.yu@intel.com Cc: gorcunov@virtuozzo.com Cc: heiko.carstens@de.ibm.com Cc: hpa@zytor.com Cc: ink@jurassic.park.msu.ru Cc: mattst88@gmail.com Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/149789534632.9059.2901382369609922565.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- include/linux/rwsem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index dd1d14250340..0ad7318ff299 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -44,6 +44,7 @@ struct rw_semaphore { }; extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -- cgit From 1dbb6704de91b169a58d0c8221624afd6a95cfc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Aug 2017 17:24:04 +0200 Subject: jump_label: Fix concurrent static_key_enable/disable() static_key_enable/disable are trying to cap the static key count to 0/1. However, their use of key->enabled is outside jump_label_lock so they do not really ensure that. Rewrite them to do a quick check for an already enabled (respectively, already disabled), and then recheck under the jump label lock. Unlike static_key_slow_inc/dec, a failed check under the jump label lock does not modify key->enabled. Signed-off-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Cc: Eric Dumazet Cc: Jason Baron Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1501601046-35683-2-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 2afd74b9d844..740a42ea7f7f 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -234,22 +234,24 @@ static inline int jump_label_apply_nops(struct module *mod) static inline void static_key_enable(struct static_key *key) { - int count = static_key_count(key); - - WARN_ON_ONCE(count < 0 || count > 1); + STATIC_KEY_CHECK_USE(); - if (!count) - static_key_slow_inc(key); + if (atomic_read(&key->enabled) != 0) { + WARN_ON_ONCE(atomic_read(&key->enabled) != 1); + return; + } + atomic_set(&key->enabled, 1); } static inline void static_key_disable(struct static_key *key) { - int count = static_key_count(key); - - WARN_ON_ONCE(count < 0 || count > 1); + STATIC_KEY_CHECK_USE(); - if (count) - static_key_slow_dec(key); + if (atomic_read(&key->enabled) != 1) { + WARN_ON_ONCE(atomic_read(&key->enabled) != 0); + return; + } + atomic_set(&key->enabled, 0); } #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } -- cgit From be040bea9085a9c2b1700c9e60888777baeb96d5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Aug 2017 17:24:06 +0200 Subject: cpuset: Make nr_cpusets private Any use of key->enabled (that is static_key_enabled and static_key_count) outside jump_label_lock should handle its own serialization. In the case of cpusets_enabled_key, the key is always incremented/decremented under cpuset_mutex, and hence the same rule applies to nr_cpusets. The rule *is* respected currently, but the mutex is static so nr_cpusets should be static too. Signed-off-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Acked-by: Zefan Li Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1501601046-35683-4-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 898cfe2eeb42..e74655d941b7 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void) return static_branch_unlikely(&cpusets_enabled_key); } -static inline int nr_cpusets(void) -{ - /* jump label reference count + the top-level cpuset */ - return static_key_count(&cpusets_enabled_key.key) + 1; -} - static inline void cpuset_inc(void) { static_branch_inc(&cpusets_pre_enable_key); -- cgit From 5a40527f8f0798553764fc8db4111d7d9c33ea51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Aug 2017 09:02:56 +0100 Subject: jump_label: Provide hotplug context variants As using the normal static key API under the hotplug lock is pretty much impossible, let's provide a variant of some of them that require the hotplug lock to have already been taken. These function are only meant to be used in CPU hotplug callbacks. Signed-off-by: Marc Zyngier Signed-off-by: Peter Zijlstra (Intel) Cc: Leo Yan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20170801080257.5056-4-marc.zyngier@arm.com Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 740a42ea7f7f..cd5861651b17 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -163,6 +163,8 @@ extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); extern void static_key_disable(struct static_key *key); +extern void static_key_enable_cpuslocked(struct static_key *key); +extern void static_key_disable_cpuslocked(struct static_key *key); /* * We should be using ATOMIC_INIT() for initializing .enabled, but @@ -254,6 +256,9 @@ static inline void static_key_disable(struct static_key *key) atomic_set(&key->enabled, 0); } +#define static_key_enable_cpuslocked(k) static_key_enable((k)) +#define static_key_disable_cpuslocked(k) static_key_disable((k)) + #define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } #define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } @@ -415,8 +420,10 @@ extern bool ____wrong_branch_error(void); * Normal usage; boolean enable/disable. */ -#define static_branch_enable(x) static_key_enable(&(x)->key) -#define static_branch_disable(x) static_key_disable(&(x)->key) +#define static_branch_enable(x) static_key_enable(&(x)->key) +#define static_branch_disable(x) static_key_disable(&(x)->key) +#define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key) +#define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key) #endif /* __ASSEMBLY__ */ -- cgit From 8b1b436dd1ccc8a1770af6e56eec047ad4920659 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Jun 2017 18:05:07 +0200 Subject: mm, locking: Rework {set,clear,mm}_tlb_flush_pending() Commit: af2c1401e6f9 ("mm: numa: guarantee that tlb_flush_pending updates are visible before page table updates") added smp_mb__before_spinlock() to set_tlb_flush_pending(). I think we can solve the same problem without this barrier. If instead we mandate that mm_tlb_flush_pending() is used while holding the PTL we're guaranteed to observe prior set_tlb_flush_pending() instances. For this to work we need to rework migrate_misplaced_transhuge_page() a little and move the test up into do_huge_pmd_numa_page(). NOTE: this relies on flush_tlb_range() to guarantee: (1) it ensures that prior page table updates are visible to the page table walker and (2) it ensures that subsequent memory accesses are only made visible after the invalidation has completed This is required for architectures that implement TRANSPARENT_HUGEPAGE (arc, arm, arm64, mips, powerpc, s390, sparc, x86) or otherwise use mm_tlb_flush_pending() in their page-table operations (arm, arm64, x86). This appears true for: - arm (DSB ISB before and after), - arm64 (DSB ISHST before, and DSB ISH after), - powerpc (PTESYNC before and after), - s390 and x86 TLB invalidate are serializing instructions But I failed to understand the situation for: - arc, mips, sparc Now SPARC64 is a wee bit special in that flush_tlb_range() is a no-op and it flushes the TLBs using arch_{enter,leave}_lazy_mmu_mode() inside the PTL. It still needs to guarantee the PTL unlock happens _after_ the invalidate completes. Vineet, Ralf and Dave could you guys please have a look? Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: David S. Miller Cc: Heiko Carstens Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Rik van Riel Cc: Russell King Cc: Thomas Gleixner Cc: Vineet Gupta Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f384bb62d8e..36ea3cf7d85e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -531,23 +531,44 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) */ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { - barrier(); + /* + * Must be called with PTL held; such that our PTL acquire will have + * observed the store from set_tlb_flush_pending(). + */ return mm->tlb_flush_pending; } static inline void set_tlb_flush_pending(struct mm_struct *mm) { mm->tlb_flush_pending = true; - /* - * Guarantee that the tlb_flush_pending store does not leak into the - * critical section updating the page tables + * The only time this value is relevant is when there are indeed pages + * to flush. And we'll only flush pages after changing them, which + * requires the PTL. + * + * So the ordering here is: + * + * mm->tlb_flush_pending = true; + * spin_lock(&ptl); + * ... + * set_pte_at(); + * spin_unlock(&ptl); + * + * spin_lock(&ptl) + * mm_tlb_flush_pending(); + * .... + * spin_unlock(&ptl); + * + * flush_tlb_range(); + * mm->tlb_flush_pending = false; + * + * So the =true store is constrained by the PTL unlock, and the =false + * store is constrained by the TLB invalidate. */ - smp_mb__before_spinlock(); } /* Clearing is done after a TLB flush, which also provides a barrier. */ static inline void clear_tlb_flush_pending(struct mm_struct *mm) { - barrier(); + /* see set_tlb_flush_pending */ mm->tlb_flush_pending = false; } #else -- cgit From d89e588ca4081615216cc25f2489b0281ac0bfe9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Sep 2016 11:37:53 +0200 Subject: locking: Introduce smp_mb__after_spinlock() Since its inception, our understanding of ACQUIRE, esp. as applied to spinlocks, has changed somewhat. Also, I wonder if, with a simple change, we cannot make it provide more. The problem with the comment is that the STORE done by spin_lock isn't itself ordered by the ACQUIRE, and therefore a later LOAD can pass over it and cross with any prior STORE, rendering the default WMB insufficient (pointed out by Alan). Now, this is only really a problem on PowerPC and ARM64, both of which already defined smp_mb__before_spinlock() as a smp_mb(). At the same time, we can get a much stronger construct if we place that same barrier _inside_ the spin_lock(). In that case we upgrade the RCpc spinlock to an RCsc. That would make all schedule() calls fully transitive against one another. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Alan Stern Cc: Benjamin Herrenschmidt Cc: Linus Torvalds Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Oleg Nesterov Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 3 +++ include/linux/spinlock.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index c56be7410130..40d6bfec0e0d 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -38,6 +38,9 @@ * Besides, if an arch has a special barrier for acquire/release, it could * implement its own __atomic_op_* and use the same framework for building * variants + * + * If an architecture overrides __atomic_op_acquire() it will probably want + * to define smp_mb__after_spinlock(). */ #ifndef __atomic_op_acquire #define __atomic_op_acquire(op, args...) \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d9510e8522d4..840281095933 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -130,6 +130,42 @@ do { \ #define smp_mb__before_spinlock() smp_wmb() #endif +/* + * This barrier must provide two things: + * + * - it must guarantee a STORE before the spin_lock() is ordered against a + * LOAD after it, see the comments at its two usage sites. + * + * - it must ensure the critical section is RCsc. + * + * The latter is important for cases where we observe values written by other + * CPUs in spin-loops, without barriers, while being subject to scheduling. + * + * CPU0 CPU1 CPU2 + * + * for (;;) { + * if (READ_ONCE(X)) + * break; + * } + * X=1 + * + * + * r = X; + * + * without transitivity it could be that CPU1 observes X!=0 breaks the loop, + * we get migrated and CPU2 sees X==0. + * + * Since most load-store architectures implement ACQUIRE with an smp_mb() after + * the LL/SC loop, they need no further barriers. Similarly all our TSO + * architectures imply an smp_mb() for each atomic instruction and equally don't + * need more. + * + * Architectures that can implement ACQUIRE better need to take care. + */ +#ifndef smp_mb__after_spinlock +#define smp_mb__after_spinlock() do { } while (0) +#endif + /** * raw_spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. -- cgit From a9668cd6ee288c4838bc668880ac085be551cac2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Jun 2017 17:51:27 +0200 Subject: locking: Remove smp_mb__before_spinlock() Now that there are no users of smp_mb__before_spinlock() left, remove it entirely. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 840281095933..4e8cce19b507 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -117,19 +117,6 @@ do { \ #endif /*arch_spin_is_contended*/ #endif -/* - * Despite its name it doesn't necessarily has to be a full barrier. - * It should only guarantee that a STORE before the critical section - * can not be reordered with LOADs and STOREs inside this section. - * spin_lock() is the one-way barrier, this LOAD can not escape out - * of the region. So the default implementation simply ensures that - * a STORE can not move into the critical section, smp_wmb() should - * serialize it with another STORE done by spin_lock(). - */ -#ifndef smp_mb__before_spinlock -#define smp_mb__before_spinlock() smp_wmb() -#endif - /* * This barrier must provide two things: * -- cgit From d92a8cfcb37ecd1315269dab741f073b63b3a8b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Mar 2017 10:13:38 +0100 Subject: locking/lockdep: Rework FS_RECLAIM annotation A while ago someone, and I cannot find the email just now, asked if we could not implement the RECLAIM_FS inversion stuff with a 'fake' lock like we use for other things like workqueues etc. I think this should be possible which allows reducing the 'irq' states and will reduce the amount of __bfs() lookups we do. Removing the 1 IRQ state results in 4 less __bfs() walks per dependency, improving lockdep performance. And by moving this annotation out of the lockdep code it becomes easier for the mm people to extend. Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Nikolay Borisov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: iamjoonsoo.kim@lge.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 11 ++--------- include/linux/sched.h | 1 - include/linux/sched/mm.h | 8 ++++++++ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fffe49f188e6..0a4c02c2d7a2 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -29,7 +29,7 @@ extern int lock_stat; * We'd rather not expose kernel/lockdep_states.h this wide, but we do need * the total number of states... :-( */ -#define XXX_LOCK_USAGE_STATES (1+3*4) +#define XXX_LOCK_USAGE_STATES (1+2*4) /* * NR_LOCKDEP_CACHING_CLASSES ... Number of classes @@ -363,10 +363,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock, extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); -extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); -extern void lockdep_clear_current_reclaim_state(void); -extern void lockdep_trace_alloc(gfp_t mask); - struct pin_cookie { unsigned int val; }; #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } @@ -375,7 +371,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); -# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, +# define INIT_LOCKDEP .lockdep_recursion = 0, #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) @@ -416,9 +412,6 @@ static inline void lockdep_on(void) # define lock_downgrade(l, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) -# define lockdep_set_current_reclaim_state(g) do { } while (0) -# define lockdep_clear_current_reclaim_state() do { } while (0) -# define lockdep_trace_alloc(g) do { } while (0) # define lockdep_info() do { } while (0) # define lockdep_init_map(lock, name, key, sub) \ do { (void)(name); (void)(key); } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..57db70ec70d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -846,7 +846,6 @@ struct task_struct { int lockdep_depth; unsigned int lockdep_recursion; struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; #endif #ifdef CONFIG_UBSAN diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2b24a6974847..2b0a281f9d26 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags) return flags; } +#ifdef CONFIG_LOCKDEP +extern void fs_reclaim_acquire(gfp_t gfp_mask); +extern void fs_reclaim_release(gfp_t gfp_mask); +#else +static inline void fs_reclaim_acquire(gfp_t gfp_mask) { } +static inline void fs_reclaim_release(gfp_t gfp_mask) { } +#endif + static inline unsigned int memalloc_noio_save(void) { unsigned int flags = current->flags & PF_MEMALLOC_NOIO; -- cgit From b09be676e0ff25bd6d2e7637e26d349f9109ad75 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:52 +0900 Subject: locking/lockdep: Implement the 'crossrelease' feature Lockdep is a runtime locking correctness validator that detects and reports a deadlock or its possibility by checking dependencies between locks. It's useful since it does not report just an actual deadlock but also the possibility of a deadlock that has not actually happened yet. That enables problems to be fixed before they affect real systems. However, this facility is only applicable to typical locks, such as spinlocks and mutexes, which are normally released within the context in which they were acquired. However, synchronization primitives like page locks or completions, which are allowed to be released in any context, also create dependencies and can cause a deadlock. So lockdep should track these locks to do a better job. The 'crossrelease' implementation makes these primitives also be tracked. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-6-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 24 +++++++++-- include/linux/lockdep.h | 110 ++++++++++++++++++++++++++++++++++++++++++++--- include/linux/sched.h | 8 ++++ 3 files changed, 131 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5dd1272d1ab2..5fdd93bb9300 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -23,10 +23,26 @@ # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) -# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) -# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) +# define trace_hardirq_enter() \ +do { \ + current->hardirq_context++; \ + crossrelease_hist_start(XHLOCK_HARD); \ +} while (0) +# define trace_hardirq_exit() \ +do { \ + current->hardirq_context--; \ + crossrelease_hist_end(XHLOCK_HARD); \ +} while (0) +# define lockdep_softirq_enter() \ +do { \ + current->softirq_context++; \ + crossrelease_hist_start(XHLOCK_SOFT); \ +} while (0) +# define lockdep_softirq_exit() \ +do { \ + current->softirq_context--; \ + crossrelease_hist_end(XHLOCK_SOFT); \ +} while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0a4c02c2d7a2..e1e0fcd99613 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -155,6 +155,12 @@ struct lockdep_map { int cpu; unsigned long ip; #endif +#ifdef CONFIG_LOCKDEP_CROSSRELEASE + /* + * Whether it's a crosslock. + */ + int cross; +#endif }; static inline void lockdep_copy_map(struct lockdep_map *to, @@ -258,8 +264,62 @@ struct held_lock { unsigned int hardirqs_off:1; unsigned int references:12; /* 32 bits */ unsigned int pin_count; +#ifdef CONFIG_LOCKDEP_CROSSRELEASE + /* + * Generation id. + * + * A value of cross_gen_id will be stored when holding this, + * which is globally increased whenever each crosslock is held. + */ + unsigned int gen_id; +#endif +}; + +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +#define MAX_XHLOCK_TRACE_ENTRIES 5 + +/* + * This is for keeping locks waiting for commit so that true dependencies + * can be added at commit step. + */ +struct hist_lock { + /* + * Seperate stack_trace data. This will be used at commit step. + */ + struct stack_trace trace; + unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES]; + + /* + * Seperate hlock instance. This will be used at commit step. + * + * TODO: Use a smaller data structure containing only necessary + * data. However, we should make lockdep code able to handle the + * smaller one first. + */ + struct held_lock hlock; +}; + +/* + * To initialize a lock as crosslock, lockdep_init_map_crosslock() should + * be called instead of lockdep_init_map(). + */ +struct cross_lock { + /* + * Seperate hlock instance. This will be used at commit step. + * + * TODO: Use a smaller data structure containing only necessary + * data. However, we should make lockdep code able to handle the + * smaller one first. + */ + struct held_lock hlock; }; +struct lockdep_map_cross { + struct lockdep_map map; + struct cross_lock xlock; +}; +#endif + /* * Initialization, self-test and debugging-output methods: */ @@ -281,13 +341,6 @@ extern void lockdep_on(void); extern void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass); -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - /* * Reinitialize a lock key - for cases where there is special locking or * special initialization of locks so that the validator gets the scope @@ -460,6 +513,49 @@ struct pin_cookie { }; #endif /* !LOCKDEP */ +enum xhlock_context_t { + XHLOCK_HARD, + XHLOCK_SOFT, + XHLOCK_PROC, + XHLOCK_CTX_NR, +}; + +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +extern void lockdep_init_map_crosslock(struct lockdep_map *lock, + const char *name, + struct lock_class_key *key, + int subclass); +extern void lock_commit_crosslock(struct lockdep_map *lock); + +#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ + { .map.name = (_name), .map.key = (void *)(_key), \ + .map.cross = 1, } + +/* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), .cross = 0, } + +extern void crossrelease_hist_start(enum xhlock_context_t c); +extern void crossrelease_hist_end(enum xhlock_context_t c); +extern void lockdep_init_task(struct task_struct *task); +extern void lockdep_free_task(struct task_struct *task); +#else +/* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + +static inline void crossrelease_hist_start(enum xhlock_context_t c) {} +static inline void crossrelease_hist_end(enum xhlock_context_t c) {} +static inline void lockdep_init_task(struct task_struct *task) {} +static inline void lockdep_free_task(struct task_struct *task) {} +#endif + #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); diff --git a/include/linux/sched.h b/include/linux/sched.h index 57db70ec70d0..5235fba537fc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -848,6 +848,14 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +#define MAX_XHLOCKS_NR 64UL + struct hist_lock *xhlocks; /* Crossrelease history locks */ + unsigned int xhlock_idx; + /* For restoring at history boundaries */ + unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; +#endif + #ifdef CONFIG_UBSAN unsigned int in_ubsan; #endif -- cgit From 23f873d8f9526ed7e49a1a02a45f8afb9ae5fb84 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:53 +0900 Subject: locking/lockdep: Detect and handle hist_lock ring buffer overwrite The ring buffer can be overwritten by hardirq/softirq/work contexts. That cases must be considered on rollback or commit. For example, |<------ hist_lock ring buffer size ----->| ppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii wrapped > iiiiiiiiiiiiiiiiiiiiiii.................... where 'p' represents an acquisition in process context, 'i' represents an acquisition in irq context. On irq exit, crossrelease tries to rollback idx to original position, but it should not because the entry already has been invalid by overwriting 'i'. Avoid rollback or commit for entries overwritten. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-7-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 20 ++++++++++++++++++++ include/linux/sched.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index e1e0fcd99613..c75eedd55af5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -283,6 +283,26 @@ struct held_lock { * can be added at commit step. */ struct hist_lock { + /* + * Id for each entry in the ring buffer. This is used to + * decide whether the ring buffer was overwritten or not. + * + * For example, + * + * |<----------- hist_lock ring buffer size ------->| + * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii + * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii....................... + * + * where 'p' represents an acquisition in process + * context, 'i' represents an acquisition in irq + * context. + * + * In this example, the ring buffer was overwritten by + * acquisitions in irq context, that should be detected on + * rollback or commit. + */ + unsigned int hist_id; + /* * Seperate stack_trace data. This will be used at commit step. */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5235fba537fc..772c5f643764 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -854,6 +854,9 @@ struct task_struct { unsigned int xhlock_idx; /* For restoring at history boundaries */ unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; + unsigned int hist_id; + /* For overwrite check at each context exit */ + unsigned int hist_id_save[XHLOCK_CTX_NR]; #endif #ifdef CONFIG_UBSAN -- cgit From 28a903f63ec0811ead70ad0f8665e838d207a25e Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:54 +0900 Subject: locking/lockdep: Handle non(or multi)-acquisition of a crosslock No acquisition might be in progress on commit of a crosslock. Completion operations enabling crossrelease are the case like: CONTEXT X CONTEXT Y --------- --------- trigger completion context complete AX commit AX wait_for_complete AX acquire AX wait where AX is a crosslock. When no acquisition is in progress, we should not perform commit because the lock does not exist, which might cause incorrect memory access. So we have to track the number of acquisitions of a crosslock to handle it. Moreover, in case that more than one acquisition of a crosslock are overlapped like: CONTEXT W CONTEXT X CONTEXT Y CONTEXT Z --------- --------- --------- --------- acquire AX (gen_id: 1) acquire A acquire AX (gen_id: 10) acquire B commit AX acquire C commit AX where A, B and C are typical locks and AX is a crosslock. Current crossrelease code performs commits in Y and Z with gen_id = 10. However, we can use gen_id = 1 to do it, since not only 'acquire AX in X' but 'acquire AX in W' also depends on each acquisition in Y and Z until their commits. So make it use gen_id = 1 instead of 10 on their commits, which adds an additional dependency 'AX -> A' in the example above. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-8-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index c75eedd55af5..651cc61af041 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -324,6 +324,19 @@ struct hist_lock { * be called instead of lockdep_init_map(). */ struct cross_lock { + /* + * When more than one acquisition of crosslocks are overlapped, + * we have to perform commit for them based on cross_gen_id of + * the first acquisition, which allows us to add more true + * dependencies. + * + * Moreover, when no acquisition of a crosslock is in progress, + * we should not perform commit because the lock might not exist + * any more, which might cause incorrect memory access. So we + * have to track the number of acquisitions of a crosslock. + */ + int nr_acquire; + /* * Seperate hlock instance. This will be used at commit step. * @@ -547,9 +560,16 @@ extern void lockdep_init_map_crosslock(struct lockdep_map *lock, int subclass); extern void lock_commit_crosslock(struct lockdep_map *lock); +/* + * What we essencially have to initialize is 'nr_acquire'. Other members + * will be initialized in add_xlock(). + */ +#define STATIC_CROSS_LOCK_INIT() \ + { .nr_acquire = 0,} + #define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ { .map.name = (_name), .map.key = (void *)(_key), \ - .map.cross = 1, } + .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), } /* * To initialize a lockdep_map statically use this macro. -- cgit From cd8084f91c02c1afd256a39aa833bff737631304 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:56 +0900 Subject: locking/lockdep: Apply crossrelease to completions Although wait_for_completion() and its family can cause deadlock, the lock correctness validator could not be applied to them until now, because things like complete() are usually called in a different context from the waiting context, which violates lockdep's assumption. Thanks to CONFIG_LOCKDEP_CROSSRELEASE, we can now apply the lockdep detector to those completion operations. Applied it. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-10-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/completion.h b/include/linux/completion.h index 5d5aaae3af43..9bcebf509b4d 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -9,6 +9,9 @@ */ #include +#ifdef CONFIG_LOCKDEP_COMPLETE +#include +#endif /* * struct completion - structure used to maintain state for a "completion" @@ -25,10 +28,50 @@ struct completion { unsigned int done; wait_queue_head_t wait; +#ifdef CONFIG_LOCKDEP_COMPLETE + struct lockdep_map_cross map; +#endif }; +#ifdef CONFIG_LOCKDEP_COMPLETE +static inline void complete_acquire(struct completion *x) +{ + lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); +} + +static inline void complete_release(struct completion *x) +{ + lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_); +} + +static inline void complete_release_commit(struct completion *x) +{ + lock_commit_crosslock((struct lockdep_map *)&x->map); +} + +#define init_completion(x) \ +do { \ + static struct lock_class_key __key; \ + lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ + "(complete)" #x, \ + &__key, 0); \ + __init_completion(x); \ +} while (0) +#else +#define init_completion(x) __init_completion(x) +static inline void complete_acquire(struct completion *x) {} +static inline void complete_release(struct completion *x) {} +static inline void complete_release_commit(struct completion *x) {} +#endif + +#ifdef CONFIG_LOCKDEP_COMPLETE +#define COMPLETION_INITIALIZER(work) \ + { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ + STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } +#else #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } +#endif #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) @@ -70,7 +113,7 @@ struct completion { * This inline function will initialize a dynamically created completion * structure. */ -static inline void init_completion(struct completion *x) +static inline void __init_completion(struct completion *x) { x->done = 0; init_waitqueue_head(&x->wait); -- cgit From 90001d67be2fa2acbe3510d1f64fa6533efa30ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 31 Jul 2017 17:50:05 +0200 Subject: sched/fair: Fix wake_affine() for !NUMA_BALANCING In commit: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()") Rik changed wake_affine to consider NUMA information when balancing between LLC domains. There are a number of problems here which this patch tries to address: - LLC < NODE; in this case we'd use the wrong information to balance - !NUMA_BALANCING: in this case, the new code doesn't do any balancing at all - re-computes the NUMA data for every wakeup, this can mean iterating up to 64 CPUs for every wakeup. - default affine wakeups inside a cache We address these by saving the load/capacity values for each sched_domain during regular load-balance and using these values in wake_affine_llc(). The obvious down-side to using cached values is that they can be too old and poorly reflect reality. But this way we can use LLC wide information and thus not rely on assuming LLC matches NODE. We also don't rely on NUMA_BALANCING nor do we have to aggegate two nodes (or even cache domains) worth of CPUs for each wakeup. Signed-off-by: Peter Zijlstra (Intel) Cc: Josef Bacik Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()") [ Minor readability improvements. ] Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 7d065abc7a47..d7b6dab956ec 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -71,6 +71,14 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + + /* + * Some variables from the most recent sd_lb_stats for this domain, + * used by wake_affine(). + */ + unsigned long nr_running; + unsigned long load; + unsigned long capacity; }; struct sched_domain { -- cgit From e743eb1ecd5564b5ae0a4a76c1566f748a358839 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Aug 2017 08:25:38 -0600 Subject: block: remove unused syncfull/asyncfull queue flags We haven't used these in years, but somehow the definitions still remained. Kill them, and renumber the QUEUE_FLAG_ space. We had a hole in the beginning of the space, too. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 60 ++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..f45f157b2910 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -601,38 +601,36 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ -#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ -#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ -#define QUEUE_FLAG_DYING 5 /* queue being torn down */ -#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ -#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */ -#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */ +#define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ +#define QUEUE_FLAG_DYING 2 /* queue being torn down */ +#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ +#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ +#define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 8 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ -#define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ -#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ -#define QUEUE_FLAG_SECERASE 17 /* supports secure erase */ -#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ -#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ -#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ -#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 23 /* Write back caching */ -#define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ -#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ -#define QUEUE_FLAG_DAX 26 /* device supports DAX */ -#define QUEUE_FLAG_STATS 27 /* track rq completion times */ -#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ -#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ -#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */ -#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */ +#define QUEUE_FLAG_IO_STAT 10 /* do IO stats */ +#define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */ +#define QUEUE_FLAG_SECERASE 14 /* supports secure erase */ +#define QUEUE_FLAG_SAME_FORCE 15 /* force complete on same CPU */ +#define QUEUE_FLAG_DEAD 16 /* queue tear-down finished */ +#define QUEUE_FLAG_INIT_DONE 17 /* queue is initialized */ +#define QUEUE_FLAG_NO_SG_MERGE 18 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 19 /* IO polling enabled if set */ +#define QUEUE_FLAG_WC 20 /* Write back caching */ +#define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ +#define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */ +#define QUEUE_FLAG_DAX 23 /* device supports DAX */ +#define QUEUE_FLAG_STATS 24 /* track rq completion times */ +#define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ +#define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ +#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ +#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit From 229a71860547ec856b156179a9c6bef2de426f66 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Aug 2017 11:38:21 +0900 Subject: irq: Make the irqentry text section unconditional Generate irqentry and softirqentry text sections without any Kconfig dependencies. This will add extra sections, but there should be no performace impact. Suggested-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Chris Zankel Cc: David S . Miller Cc: Francis Deslauriers Cc: Jesper Nilsson Cc: Linus Torvalds Cc: Max Filippov Cc: Mikael Starvik Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: linux-arch@vger.kernel.org Cc: linux-cris-kernel@axis.com Cc: mathieu.desnoyers@efficios.com Link: http://lkml.kernel.org/r/150172789110.27216.3955739126693102122.stgit@devbox Signed-off-by: Ingo Molnar --- include/asm-generic/sections.h | 4 ++++ include/asm-generic/vmlinux.lds.h | 8 -------- include/linux/interrupt.h | 14 +------------- 3 files changed, 5 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 532372c6cf15..e5da44eddd2f 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -27,6 +27,8 @@ * __kprobes_text_start, __kprobes_text_end * __entry_text_start, __entry_text_end * __ctors_start, __ctors_end + * __irqentry_text_start, __irqentry_text_end + * __softirqentry_text_start, __softirqentry_text_end */ extern char _text[], _stext[], _etext[]; extern char _data[], _sdata[], _edata[]; @@ -39,6 +41,8 @@ extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; +extern char __irqentry_text_start[], __irqentry_text_end[]; +extern char __softirqentry_text_start[], __softirqentry_text_end[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da0be9a8d1de..62e2395f0a82 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -483,25 +483,17 @@ *(.entry.text) \ VMLINUX_SYMBOL(__entry_text_end) = .; -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define IRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__irqentry_text_start) = .; \ *(.irqentry.text) \ VMLINUX_SYMBOL(__irqentry_text_end) = .; -#else -#define IRQENTRY_TEXT -#endif -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) #define SOFTIRQENTRY_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__softirqentry_text_start) = .; \ *(.softirqentry.text) \ VMLINUX_SYMBOL(__softirqentry_text_end) = .; -#else -#define SOFTIRQENTRY_TEXT -#endif /* Section used for early init (in .S files) */ #define HEAD_TEXT *(.head.text) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a2fddddb0d60..59ba11661b6e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -726,7 +727,6 @@ extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN) /* * We want to know which function is an entrypoint of a hardirq or a softirq. */ @@ -734,16 +734,4 @@ extern int arch_early_irq_init(void); #define __softirq_entry \ __attribute__((__section__(".softirqentry.text"))) -/* Limits of hardirq entrypoints */ -extern char __irqentry_text_start[]; -extern char __irqentry_text_end[]; -/* Limits of softirq entrypoints */ -extern char __softirqentry_text_start[]; -extern char __softirqentry_text_end[]; - -#else -#define __irq_entry -#define __softirq_entry -#endif - #endif -- cgit From fc53662f13b889a5a1c069e79ee1e3d4534df132 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 2 Aug 2017 18:09:14 +0200 Subject: x86/hyper-v: Make hv_do_hypercall() inline We have only three call sites for hv_do_hypercall() and we're going to change HVCALL_SIGNAL_EVENT to doing fast hypercall so we can inline this function for optimization. Hyper-V top level functional specification states that r9-r11 registers and flags may be clobbered by the hypervisor during hypercall and with inlining this is somewhat important, add the clobbers. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Andy Shevchenko Reviewed-by: Stephen Hemminger Cc: Andy Lutomirski Cc: Haiyang Zhang Cc: Jork Loeser Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Simon Xiao Cc: Steven Rostedt Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/20170802160921.21791-3-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- include/linux/hyperv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b7d7bbec74e0..6608a71e7d79 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1187,7 +1187,6 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, bool fb_overlap_ok); void vmbus_free_mmio(resource_size_t start, resource_size_t size); int vmbus_cpu_number_to_vp_number(int cpu_number); -u64 hv_do_hypercall(u64 control, void *input, void *output); /* * GUID definitions of various offer types - services offered to the guest. -- cgit From 057841713cfff62b4485cdd2b245f05b7ea3ba16 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 2 Aug 2017 18:09:16 +0200 Subject: hyper-v: Use fast hypercall for HVCALL_SIGNAL_EVENT We need to pass only 8 bytes of input for HvSignalEvent which makes it a perfect fit for fast hypercall. hv_input_signal_event_buffer is not needed any more and hv_input_signal_event is converted to union for convenience. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Andy Shevchenko Reviewed-by: Stephen Hemminger Cc: Andy Lutomirski Cc: Haiyang Zhang Cc: Jork Loeser Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Simon Xiao Cc: Steven Rostedt Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/20170802160921.21791-5-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- include/linux/hyperv.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6608a71e7d79..c472bd43bdd7 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -677,18 +677,6 @@ union hv_connection_id { } u; }; -/* Definition of the hv_signal_event hypercall input structure. */ -struct hv_input_signal_event { - union hv_connection_id connectionid; - u16 flag_number; - u16 rsvdz; -}; - -struct hv_input_signal_event_buffer { - u64 align8; - struct hv_input_signal_event event; -}; - enum hv_numa_policy { HV_BALANCED = 0, HV_LOCALIZED, @@ -770,8 +758,7 @@ struct vmbus_channel { } callback_mode; bool is_dedicated_interrupt; - struct hv_input_signal_event_buffer sig_buf; - struct hv_input_signal_event *sig_event; + u64 sig_event; /* * Starting with win8, this field will be used to specify -- cgit From 7415aea6072bab15969b6c3c5b2a193d88095326 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 2 Aug 2017 18:09:18 +0200 Subject: hyper-v: Globalize vp_index To support implementing remote TLB flushing on Hyper-V with a hypercall we need to make vp_index available outside of vmbus module. Rename and globalize. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Andy Shevchenko Reviewed-by: Stephen Hemminger Cc: Andy Lutomirski Cc: Haiyang Zhang Cc: Jork Loeser Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Simon Xiao Cc: Steven Rostedt Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/20170802160921.21791-7-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- include/linux/hyperv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c472bd43bdd7..e2a4fa57f110 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1173,7 +1173,6 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, resource_size_t size, resource_size_t align, bool fb_overlap_ok); void vmbus_free_mmio(resource_size_t start, resource_size_t size); -int vmbus_cpu_number_to_vp_number(int cpu_number); /* * GUID definitions of various offer types - services offered to the guest. -- cgit From a0ac441152238c9b474bafa46940511d9b2e9c7e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 8 Aug 2017 06:17:47 +0000 Subject: ASoC: soc-core: add snd_soc_rtdcom_xxx() Current snd_soc_pcm_runtime has platform / codec pointers, and we could use these specific pointer. But these will be replaced to more generic "component" soon, and will need more generic method to get each connected component pointer from rtd. This patch adds new snd_soc_rtdcom_xxx() to connect/disconnect component to rtd. It means same as previous "platform" / "codec" pointer style, but more generic. We can find necessary component pointer from rtd by using component driver name on snd_soc_rtdcom_lookup(). Here, the reason why it uses "driver name" is that "component name" was created by fmt_single_name() and difficult to use it from driver. Driver of course knows its "driver name", thus, using it is more easy. Signed-off-by: Kuninori Morimoto Tested-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- include/sound/soc.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index aad3f14966f1..87dbe39797ca 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -886,6 +886,18 @@ struct snd_soc_component { #endif }; +struct snd_soc_rtdcom_list { + struct snd_soc_component *component; + struct list_head list; /* rtd::component_list */ +}; +struct snd_soc_component* +snd_soc_rtdcom_lookup(struct snd_soc_pcm_runtime *rtd, + const char *driver_name); +#define for_each_rtdcom(rtd, rtdcom) \ + list_for_each_entry(rtdcom, &(rtd)->component_list, list) +#define for_each_rtdcom_safe(rtd, rtdcom1, rtdcom2) \ + list_for_each_entry_safe(rtdcom1, rtdcom2, &(rtd)->component_list, list) + /* SoC Audio Codec device */ struct snd_soc_codec { struct device *dev; @@ -1240,6 +1252,7 @@ struct snd_soc_pcm_runtime { unsigned int num; /* 0-based and monotonic increasing */ struct list_head list; /* rtd list of the soc card */ + struct list_head component_list; /* list of connected components */ /* bit field */ unsigned int dev_registered:1; -- cgit From 90be711e23f7c7ee7b3d6a6e5aa7ee9bab321f2e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 8 Aug 2017 06:18:10 +0000 Subject: ASoC: use snd_soc_rtdcom_add() and convert to consistent operation Basically, current ALSA SoC framework is based on CPU/Codec/Platform, but its operation doesn't have consistent. Thus, source code was unreadable, and difficult to understand. This patch connects each component (= CPU/Codec/Platform) to rtd by using snd_soc_rtdcom_add(), and convert uneven operations to consistent operation. Signed-off-by: Kuninori Morimoto Tested-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 87dbe39797ca..82e078151787 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -1237,7 +1237,7 @@ struct snd_soc_pcm_runtime { struct snd_pcm *pcm; struct snd_compr *compr; struct snd_soc_codec *codec; - struct snd_soc_platform *platform; + struct snd_soc_platform *platform; /* will be removed */ struct snd_soc_dai *codec_dai; struct snd_soc_dai *cpu_dai; -- cgit From fc66ce0b72046318c4c4a66e67a2362193df6de1 Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Thu, 10 Aug 2017 09:02:37 -0700 Subject: ARM: OMAP2+: Add pdata-quirks for MMC/SD on DRA74x EVM DRA74x EVM Rev H EVM comes with revision 2.0 silicon. However, earlier versions of EVM can come with either revision 1.1 or revision 1.0 of silicon. The device-tree file is written to support rev 2.0 of silicon. pdata quirks are used to then override the settings needed for PG 1.1 silicon. PG 1.1 silicon has limitations w.r.t frequencies at which MMC1/2/3 can operate as well as different IOdelay numbers. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Sekhar Nori Signed-off-by: Tony Lindgren --- include/linux/platform_data/hsmmc-omap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h index 8e981be2e2c2..67bded22eae4 100644 --- a/include/linux/platform_data/hsmmc-omap.h +++ b/include/linux/platform_data/hsmmc-omap.h @@ -70,6 +70,9 @@ struct omap_hsmmc_platform_data { #define HSMMC_HAS_HSPE_SUPPORT (1 << 2) unsigned features; + /* string specifying a particular variant of hardware */ + char *version; + int gpio_cd; /* gpio (card detect) */ int gpio_cod; /* gpio (cover detect) */ int gpio_wp; /* gpio (write protect) */ -- cgit From eba6130b31bc0c112302d0b1340badb37fd3be1e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 16 Jun 2017 17:24:29 +0530 Subject: ARM: dts: Add dra7 iodelay configuration Add dra7 iodelay configuration. Signed-off-by: Tony Lindgren Signed-off-by: Nishanth Menon Signed-off-by: Kishon Vijay Abraham I --- include/dt-bindings/pinctrl/dra.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/dra.h b/include/dt-bindings/pinctrl/dra.h index 5c75e80915fc..18ec5df5a581 100644 --- a/include/dt-bindings/pinctrl/dra.h +++ b/include/dt-bindings/pinctrl/dra.h @@ -73,5 +73,8 @@ */ #define DRA7XX_CORE_IOPAD(pa, val) (((pa) & 0xffff) - 0x3400) (val) +/* DRA7 IODELAY configuration parameters */ +#define A_DELAY_PS(val) ((val) & 0xffff) +#define G_DELAY_PS(val) ((val) & 0xffff) #endif -- cgit From bbfb6ce86c9889a5d434e2e603d41e0ce5b552e2 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 1 Aug 2017 09:58:12 -0700 Subject: drm/i915: Implement .get_format_info() hook for CCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SKL+ display engine can scan out certain kinds of compressed surfaces produced by the render engine. This involved telling the display engine the location of the color control surfae (CCS) which describes which parts of the main surface are compressed and which are not. The location of CCS is provided by userspace as just another plane with its own offset. By providing our own format information for the CCS formats, we should be able to make framebuffer_check() do the right thing for the CCS surface as well. Note that we'll return the same format info for both Y and Yf tiled format as that's what happens with the non-CCS Y vs. Yf as well. If desired, we could potentially return a unique pointer for each pixel_format+tiling+ccs combination, in which case we immediately be able to tell if any of that stuff changed by just comparing the pointers. But that does sound a bit wasteful space wise. v2: Drop the 'dev' argument from the hook v3: Include the description of the CCS surface layout v4: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason) v5: Re-drop 'dev', fix commit message, add missing drm_fourcc.h description of CCS layout. (daniels) Cc: Daniel Vetter Cc: Ben Widawsky Cc: Jason Ekstrand Acked-by: Jason Ekstrand Reviewed-by: Ben Widawsky (v3) Reviewed-by: Daniel Stone Signed-off-by: Ville Syrjä Signed-off-by: Ben Widawsky Signed-off-by: Daniel Stone --- include/uapi/drm/drm_fourcc.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 76c9101a7fc6..3ad838d3f93f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -263,6 +263,26 @@ extern "C" { */ #define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3) +/* + * Intel color control surface (CCS) for render compression + * + * The framebuffer format must be one of the 8:8:8:8 RGB formats. + * The main surface will be plane index 0 and must be Y/Yf-tiled, + * the CCS will be plane index 1. + * + * Each CCS tile matches a 1024x512 pixel area of the main surface. + * To match certain aspects of the 3D hardware the CCS is + * considered to be made up of normal 128Bx32 Y tiles, Thus + * the CCS pitch must be specified in multiples of 128 bytes. + * + * In reality the CCS tile appears to be a 64Bx64 Y tile, composed + * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks. + * But that fact is not relevant unless the memory is accessed + * directly. + */ +#define I915_FORMAT_MOD_Y_TILED_CCS fourcc_mod_code(INTEL, 4) +#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * -- cgit From 606799cc5049ae4ccb51ba3242365ca2d411da13 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 10 Aug 2017 12:49:57 -0500 Subject: PCI: Inline and remove pcibios_update_irq() pcibios_update_irq() was a weak function with only one trivial implementation. Inline it and remove the weak function. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 69034ab8a68e..a9e8d2f3bd91 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -844,7 +844,6 @@ char *pcibios_setup(char *str); resource_size_t pcibios_align_resource(void *, const struct resource *, resource_size_t, resource_size_t); -void pcibios_update_irq(struct pci_dev *, int irq); /* Weak but can be overriden by arch */ void pci_fixup_cardbus(struct pci_bus *); -- cgit From 690cbb90a709c1b9389c6cb8e1978e77553ce0fb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2017 00:13:07 +0200 Subject: PM / s2idle: Rename PM_SUSPEND_FREEZE to PM_SUSPEND_TO_IDLE To make it clear that the symbol in question refers to suspend-to-idle, rename it from PM_SUSPEND_FREEZE to PM_SUSPEND_TO_IDLE. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8c3b0b1e6786..d43654be0097 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -33,10 +33,10 @@ static inline void pm_restore_console(void) typedef int __bitwise suspend_state_t; #define PM_SUSPEND_ON ((__force suspend_state_t) 0) -#define PM_SUSPEND_FREEZE ((__force suspend_state_t) 1) +#define PM_SUSPEND_TO_IDLE ((__force suspend_state_t) 1) #define PM_SUSPEND_STANDBY ((__force suspend_state_t) 2) #define PM_SUSPEND_MEM ((__force suspend_state_t) 3) -#define PM_SUSPEND_MIN PM_SUSPEND_FREEZE +#define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) enum suspend_stat_step { -- cgit From f02f4f9d826590f1ef1b087374d3e3cfb7949eac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2017 00:13:56 +0200 Subject: PM / s2idle: Rename freeze_state enum and related items Rename the freeze_state enum representing the suspend-to-idle state machine states to s2idle_states and rename the related variables and functions accordingly. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d43654be0097..feb43dfbf7bc 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -237,22 +237,22 @@ static inline bool pm_resume_via_firmware(void) } /* Suspend-to-idle state machnine. */ -enum freeze_state { - FREEZE_STATE_NONE, /* Not suspended/suspending. */ - FREEZE_STATE_ENTER, /* Enter suspend-to-idle. */ - FREEZE_STATE_WAKE, /* Wake up from suspend-to-idle. */ +enum s2idle_states { + S2IDLE_STATE_NONE, /* Not suspended/suspending. */ + S2IDLE_STATE_ENTER, /* Enter suspend-to-idle. */ + S2IDLE_STATE_WAKE, /* Wake up from suspend-to-idle. */ }; -extern enum freeze_state __read_mostly suspend_freeze_state; +extern enum s2idle_states __read_mostly s2idle_state; -static inline bool idle_should_freeze(void) +static inline bool idle_should_enter_s2idle(void) { - return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER); + return unlikely(s2idle_state == S2IDLE_STATE_ENTER); } extern void __init pm_states_init(void); extern void freeze_set_ops(const struct platform_freeze_ops *ops); -extern void freeze_wake(void); +extern void s2idle_wake(void); /** * arch_suspend_disable_irqs - disable IRQs for suspend @@ -284,10 +284,10 @@ static inline bool pm_resume_via_firmware(void) { return false; } static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } -static inline bool idle_should_freeze(void) { return false; } +static inline bool idle_should_enter_s2idle(void) { return false; } static inline void __init pm_states_init(void) {} static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} -static inline void freeze_wake(void) {} +static inline void s2idle_wake(void) {} #endif /* !CONFIG_SUSPEND */ /* struct pbe is used for creating lists of pages that should be restored -- cgit From 28ba086ed30fb3fb714598aa029b894c3754fa7b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2017 00:14:45 +0200 Subject: PM / s2idle: Rename ->enter_freeze to ->enter_s2idle Rename the ->enter_freeze cpuidle driver callback to ->enter_s2idle to make it clear that it is used for entering suspend-to-idle and rename the related functions, variables and so on accordingly. Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index fc1e5d7fc1c7..f0e2e9223e66 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -52,11 +52,11 @@ struct cpuidle_state { int (*enter_dead) (struct cpuidle_device *dev, int index); /* - * CPUs execute ->enter_freeze with the local tick or entire timekeeping + * CPUs execute ->enter_s2idle with the local tick or entire timekeeping * suspended, so it must not re-enable interrupts at any point (even * temporarily) or attempt to change states of clock event devices. */ - void (*enter_freeze) (struct cpuidle_device *dev, + void (*enter_s2idle) (struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); }; @@ -197,14 +197,14 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #ifdef CONFIG_CPU_IDLE extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev); -extern int cpuidle_enter_freeze(struct cpuidle_driver *drv, +extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern void cpuidle_use_deepest_state(bool enable); #else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } -static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, +static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_use_deepest_state(bool enable) -- cgit From 23d5855f4774f4f7c246a67057ecacc904696d8a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2017 00:15:30 +0200 Subject: PM / s2idle: Rename platform operations structure Rename struct platform_freeze_ops to platform_s2idle_ops to make it clear that the callbacks in it are used during suspend-to-idle suspend/resume transitions and rename the related functions, variables and so on accordingly. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index feb43dfbf7bc..0d41daf7e89d 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -186,7 +186,7 @@ struct platform_suspend_ops { void (*recover)(void); }; -struct platform_freeze_ops { +struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); void (*wake)(void); @@ -251,7 +251,7 @@ static inline bool idle_should_enter_s2idle(void) } extern void __init pm_states_init(void); -extern void freeze_set_ops(const struct platform_freeze_ops *ops); +extern void s2idle_set_ops(const struct platform_s2idle_ops *ops); extern void s2idle_wake(void); /** @@ -286,7 +286,7 @@ static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } static inline bool idle_should_enter_s2idle(void) { return false; } static inline void __init pm_states_init(void) {} -static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} +static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {} static inline void s2idle_wake(void) {} #endif /* !CONFIG_SUSPEND */ -- cgit From 38e44da591303d08b0d965a033e11ade284999d0 Mon Sep 17 00:00:00 2001 From: Brian Bian Date: Wed, 9 Aug 2017 11:45:40 -0700 Subject: thermal: int3400_thermal: process "thermal table changed" event Some BIOS implement ACPI notification code 0x83 to indicate active relationship table(ART) and/or thermal relationship table(TRT) changes to INT3400 device. This event needs to be propagated to user space so that it can be handled by the user space thermal daemon. Signed-off-by: Brian Bian Signed-off-by: Zhang Rui --- include/linux/thermal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index dab11f97e1c6..fd5b959c753c 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -102,6 +102,7 @@ enum thermal_notify_event { THERMAL_DEVICE_DOWN, /* Thermal device is down */ THERMAL_DEVICE_UP, /* Thermal device is up after a down event */ THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ + THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */ }; struct thermal_zone_device_ops { -- cgit From 077fbac405bfc6d41419ad6c1725804ad4e9887c Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 11 Aug 2017 02:11:33 +0900 Subject: net: xfrm: support setting an output mark. On systems that use mark-based routing it may be necessary for routing lookups to use marks in order for packets to be routed correctly. An example of such a system is Android, which uses socket marks to route packets via different networks. Currently, routing lookups in tunnel mode always use a mark of zero, making routing incorrect on such systems. This patch adds a new output_mark element to the xfrm state and a corresponding XFRMA_OUTPUT_MARK netlink attribute. The output mark differs from the existing xfrm mark in two ways: 1. The xfrm mark is used to match xfrm policies and states, while the xfrm output mark is used to set the mark (and influence the routing) of the packets emitted by those states. 2. The existing mark is constrained to be a subset of the bits of the originating socket or transformed packet, but the output mark is arbitrary and depends only on the state. The use of a separate mark provides additional flexibility. For example: - A packet subject to two transforms (e.g., transport mode inside tunnel mode) can have two different output marks applied to it, one for the transport mode SA and one for the tunnel mode SA. - On a system where socket marks determine routing, the packets emitted by an IPsec tunnel can be routed based on a mark that is determined by the tunnel, not by the marks of the unencrypted packets. - Support for setting the output marks can be introduced without breaking any existing setups that employ both mark-based routing and xfrm tunnel mode. Simply changing the code to use the xfrm mark for routing output packets could xfrm mark could change behaviour in a way that breaks these setups. If the output mark is unspecified or set to zero, the mark is not set or changed. Tested: make allyesconfig; make -j64 Tested: https://android-review.googlesource.com/452776 Signed-off-by: Lorenzo Colitti Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 9 ++++++--- include/uapi/linux/xfrm.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 18d7de34a5c3..9c7b70cce6d6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -165,6 +165,7 @@ struct xfrm_state { int header_len; int trailer_len; u32 extra_flags; + u32 output_mark; } props; struct xfrm_lifetime_cfg lft; @@ -298,10 +299,12 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr); + const xfrm_address_t *daddr, + u32 mark); int (*get_saddr)(struct net *net, int oif, xfrm_address_t *saddr, - xfrm_address_t *daddr); + xfrm_address_t *daddr, + u32 mark); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, int reverse); @@ -1640,7 +1643,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, - int family); + int family, u32 mark); struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 2b384ff09fa0..5fe7370a2bef 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -304,6 +304,7 @@ enum xfrm_attr_type_t { XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ + XFRMA_OUTPUT_MARK, /* __u32 */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) -- cgit From 3e48930cc74f0c212ee1838f89ad0ca7fcf2fea1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Aug 2017 05:49:01 -0700 Subject: cgroup: misc changes Misc trivial changes to prepare for future changes. No functional difference. * Expose cgroup_get(), cgroup_tryget() and cgroup_parent(). * Implement task_dfl_cgroup() which dereferences css_set->dfl_cgrp. * Rename cgroup_stats_show() to cgroup_stat_show() for consistency with the file name. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 79faa6467f76..085056e562b1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -398,6 +398,16 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } +static inline void cgroup_get(struct cgroup *cgrp) +{ + css_get(&cgrp->self); +} + +static inline bool cgroup_tryget(struct cgroup *cgrp) +{ + return css_tryget(&cgrp->self); +} + static inline void cgroup_put(struct cgroup *cgrp) { css_put(&cgrp->self); @@ -510,6 +520,20 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } +static inline struct cgroup *task_dfl_cgroup(struct task_struct *task) +{ + return task_css_set(task)->dfl_cgrp; +} + +static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) +{ + struct cgroup_subsys_state *parent_css = cgrp->self.parent; + + if (parent_css) + return container_of(parent_css, struct cgroup, self); + return NULL; +} + /** * cgroup_is_descendant - test ancestry * @cgrp: the cgroup to be tested -- cgit From afa6c45429f6e5ddd1eb6b77a36358f9c4b789da Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:45 +0800 Subject: sctp: remove the unused typedef sctp_packet_phandler_t Remove this function typedef, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fbe6e81b889b..73e9509c057e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -657,8 +657,6 @@ struct sctp_sockaddr_entry { #define SCTP_ADDRESS_TICK_DELAY 500 -typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *); - /* This structure holds lists of chunks as we are assembling for * transmission. */ -- cgit From edf903f83ebca988e04a39f515ab6eacb92055df Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:46 +0800 Subject: sctp: remove the typedef sctp_sender_hb_info_t This patch is to remove the typedef sctp_sender_hb_info_t, and replace with struct sctp_sender_hb_info in the places where it's using this typedef. It is also to use sizeof(variable) instead of sizeof(type). Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 73e9509c057e..60033a263193 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -371,12 +371,12 @@ union sctp_params { * chunk is sent and the destination transport address to which this * HEARTBEAT is sent (see Section 8.3). */ -typedef struct sctp_sender_hb_info { +struct sctp_sender_hb_info { struct sctp_paramhdr param_hdr; union sctp_addr daddr; unsigned long sent_at; __u64 hb_nonce; -} sctp_sender_hb_info_t; +}; int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); -- cgit From 74439f344b1becd57ec822bc0e2c1a4cbf240a53 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:47 +0800 Subject: sctp: remove the typedef sctp_endpoint_type_t This patch is to remove the typedef sctp_endpoint_type_t, and replace with enum sctp_endpoint_type in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 60033a263193..937187f3bffc 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1142,10 +1142,10 @@ int sctp_is_ep_boundall(struct sock *sk); /* What type of endpoint? */ -typedef enum { +enum sctp_endpoint_type { SCTP_EP_TYPE_SOCKET, SCTP_EP_TYPE_ASSOCIATION, -} sctp_endpoint_type_t; +}; /* * A common base class to bridge the implmentation view of a @@ -1169,7 +1169,7 @@ struct sctp_ep_common { int hashent; /* Runtime type information. What kind of endpoint is this? */ - sctp_endpoint_type_t type; + enum sctp_endpoint_type type; /* Some fields to help us manage this object. * refcnt - Reference count access to this object. -- cgit From a05437ac5deb100f94e290ad4c5eef3e78f4b6bb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:48 +0800 Subject: sctp: remove the typedef sctp_cmsgs_t This patch is to remove the typedef sctp_cmsgs_t, and replace with struct sctp_cmsgs in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 937187f3bffc..e171d3a3d2b4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1985,11 +1985,11 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1, struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc); /* A convenience structure to parse out SCTP specific CMSGs. */ -typedef struct sctp_cmsgs { +struct sctp_cmsgs { struct sctp_initmsg *init; struct sctp_sndrcvinfo *srinfo; struct sctp_sndinfo *sinfo; -} sctp_cmsgs_t; +}; /* Structure for tracking memory objects */ typedef struct { -- cgit From d38ef5ae35b0960f3219f1cf0203e19819e757c7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:49 +0800 Subject: sctp: remove the typedef sctp_dbg_objcnt_entry_t This patch is to remove the typedef sctp_dbg_objcnt_entry_t, and replace with struct sctp_dbg_objcnt_entry in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e171d3a3d2b4..b6d75b37f84d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1992,9 +1992,9 @@ struct sctp_cmsgs { }; /* Structure for tracking memory objects */ -typedef struct { +struct sctp_dbg_objcnt_entry { char *label; atomic_t *counter; -} sctp_dbg_objcnt_entry_t; +}; #endif /* __sctp_structs_h__ */ -- cgit From b7ef2618a0bf75c1e480b05739b0c5f2a42081cd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:50 +0800 Subject: sctp: remove the typedef sctp_socket_type_t This patch is to remove the typedef sctp_socket_type_t, and replace with enum sctp_socket_type in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- include/net/sctp/structs.h | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 24ff7931d38c..06b4f515e157 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -550,7 +550,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) -static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) +static inline int __sctp_style(const struct sock *sk, + enum sctp_socket_type style) { return sctp_sk(sk)->type == style; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index b6d75b37f84d..0477945de1a3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -150,18 +150,18 @@ extern struct sctp_globals { #define sctp_checksum_disable (sctp_globals.checksum_disable) /* SCTP Socket type: UDP or TCP style. */ -typedef enum { +enum sctp_socket_type { SCTP_SOCKET_UDP = 0, SCTP_SOCKET_UDP_HIGH_BANDWIDTH, SCTP_SOCKET_TCP -} sctp_socket_type_t; +}; /* Per socket SCTP information. */ struct sctp_sock { /* inet_sock has to be the first member of sctp_sock */ struct inet_sock inet; /* What kind of a socket is this? */ - sctp_socket_type_t type; + enum sctp_socket_type type; /* PF_ family specific functions. */ struct sctp_pf *pf; -- cgit From e2c3108ab25b4dbab3821e8b6084bfb73afb655c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:51 +0800 Subject: sctp: remove the typedef sctp_cmd_t This patch is to remove the typedef sctp_cmd_t, and replace with enum sctp_cmd in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 376cb78b6247..4e9e589b8f18 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -196,15 +196,15 @@ static inline sctp_arg_t SCTP_NULL(void) return retval; } -typedef struct { +struct sctp_cmd { sctp_arg_t obj; sctp_verb_t verb; -} sctp_cmd_t; +}; typedef struct { - sctp_cmd_t cmds[SCTP_MAX_NUM_COMMANDS]; - sctp_cmd_t *last_used_slot; - sctp_cmd_t *next_cmd; + struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS]; + struct sctp_cmd *last_used_slot; + struct sctp_cmd *next_cmd; } sctp_cmd_seq_t; @@ -228,7 +228,7 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, sctp_arg_t obj) { - sctp_cmd_t *cmd = seq->last_used_slot - 1; + struct sctp_cmd *cmd = seq->last_used_slot - 1; BUG_ON(cmd < seq->cmds); @@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, /* Return the next command structure in an sctp_cmd_seq. * Return NULL at the end of the sequence. */ -static inline sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq) +static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq) { if (seq->next_cmd <= seq->last_used_slot) return NULL; -- cgit From a85bbeb221d860097859f110ba1321f2b0653f07 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:52 +0800 Subject: sctp: remove the typedef sctp_cmd_seq_t This patch is to remove the typedef sctp_cmd_seq_t, and replace with struct sctp_cmd_seq in the places where it's using this typedef. Note that it doesn't fix many indents although it should, as sctp_disposition_t's removal would mess them up again. So better to fix them when removing sctp_disposition_t in the later patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 14 +++++++------- include/net/sctp/sm.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 4e9e589b8f18..cbf6798866ef 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -110,7 +110,7 @@ typedef enum { SCTP_CMD_LAST } sctp_verb_t; -/* How many commands can you put in an sctp_cmd_seq_t? +/* How many commands can you put in an struct sctp_cmd_seq? * This is a rather arbitrary number, ideally derived from a careful * analysis of the state functions, but in reality just taken from * thin air in the hopes othat we don't trigger a kernel panic. @@ -201,17 +201,17 @@ struct sctp_cmd { sctp_verb_t verb; }; -typedef struct { +struct sctp_cmd_seq { struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS]; struct sctp_cmd *last_used_slot; struct sctp_cmd *next_cmd; -} sctp_cmd_seq_t; +}; /* Initialize a block of memory as a command sequence. * Return 0 if the initialization fails. */ -static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) +static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) { /* cmds[] is filled backwards to simplify the overflow BUG() check */ seq->last_used_slot = seq->cmds + SCTP_MAX_NUM_COMMANDS; @@ -220,12 +220,12 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq) } -/* Add a command to an sctp_cmd_seq_t. +/* Add a command to an struct sctp_cmd_seq. * * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above * to wrap data which goes in the obj argument. */ -static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, +static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, sctp_arg_t obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; @@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb, /* Return the next command structure in an sctp_cmd_seq. * Return NULL at the end of the sequence. */ -static inline struct sctp_cmd *sctp_next_cmd(sctp_cmd_seq_t *seq) +static inline struct sctp_cmd *sctp_next_cmd(struct sctp_cmd_seq *seq) { if (seq->next_cmd <= seq->last_used_slot) return NULL; diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 1e7651c3b158..9af64b98d96b 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -75,7 +75,7 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_association *, const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *); + struct sctp_cmd_seq *); typedef void (sctp_timer_event_t) (unsigned long); typedef struct { sctp_state_fn_t *fn; -- cgit From c488b7704ed0eed18e11f9b685931558735f2a68 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:53 +0800 Subject: sctp: remove the typedef sctp_arg_t This patch is to remove the typedef sctp_arg_t, and replace with union sctp_arg in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index cbf6798866ef..f5fc425b5a4f 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -117,7 +117,7 @@ typedef enum { */ #define SCTP_MAX_NUM_COMMANDS 20 -typedef union { +union sctp_arg { void *zero_all; /* Set to NULL to clear the entire union */ __s32 i32; __u32 u32; @@ -137,24 +137,24 @@ typedef union { struct sctp_packet *packet; struct sctp_sackhdr *sackh; struct sctp_datamsg *msg; -} sctp_arg_t; +}; /* We are simulating ML type constructors here. * * SCTP_ARG_CONSTRUCTOR(NAME, TYPE, ELT) builds a function called * SCTP_NAME() which takes an argument of type TYPE and returns an - * sctp_arg_t. It does this by inserting the sole argument into the - * ELT union element of a local sctp_arg_t. + * union sctp_arg. It does this by inserting the sole argument into + * the ELT union element of a local union sctp_arg. * * E.g., SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) builds SCTP_I32(arg), - * which takes an __s32 and returns a sctp_arg_t containing the + * which takes an __s32 and returns a union sctp_arg containing the * __s32. So, after foo = SCTP_I32(arg), foo.i32 == arg. */ #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \ -static inline sctp_arg_t \ +static inline union sctp_arg \ SCTP_## name (type arg) \ -{ sctp_arg_t retval;\ +{ union sctp_arg retval;\ retval.zero_all = NULL;\ retval.elt = arg;\ return retval;\ @@ -179,25 +179,25 @@ SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, struct sctp_sackhdr *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) -static inline sctp_arg_t SCTP_FORCE(void) +static inline union sctp_arg SCTP_FORCE(void) { return SCTP_I32(1); } -static inline sctp_arg_t SCTP_NOFORCE(void) +static inline union sctp_arg SCTP_NOFORCE(void) { return SCTP_I32(0); } -static inline sctp_arg_t SCTP_NULL(void) +static inline union sctp_arg SCTP_NULL(void) { - sctp_arg_t retval; + union sctp_arg retval; retval.zero_all = NULL; return retval; } struct sctp_cmd { - sctp_arg_t obj; + union sctp_arg obj; sctp_verb_t verb; }; @@ -226,7 +226,7 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) * to wrap data which goes in the obj argument. */ static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, - sctp_arg_t obj) + union sctp_arg obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; -- cgit From e08af95df1130883762b388a19bb150ae5d16c09 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:54 +0800 Subject: sctp: remove the typedef sctp_verb_t This patch is to remove the typedef sctp_verb_t, and replace with enum sctp_verb in the places where it's using this typedef. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/command.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index f5fc425b5a4f..b55c6a48a206 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -40,7 +40,7 @@ #include -typedef enum { +enum sctp_verb { SCTP_CMD_NOP = 0, /* Do nothing. */ SCTP_CMD_NEW_ASOC, /* Register a new association. */ SCTP_CMD_DELETE_TCB, /* Delete the current association. */ @@ -108,7 +108,7 @@ typedef enum { SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_SET_ASOC, /* Restore association context */ SCTP_CMD_LAST -} sctp_verb_t; +}; /* How many commands can you put in an struct sctp_cmd_seq? * This is a rather arbitrary number, ideally derived from a careful @@ -198,7 +198,7 @@ static inline union sctp_arg SCTP_NULL(void) struct sctp_cmd { union sctp_arg obj; - sctp_verb_t verb; + enum sctp_verb verb; }; struct sctp_cmd_seq { @@ -225,8 +225,8 @@ static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq) * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above * to wrap data which goes in the obj argument. */ -static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, sctp_verb_t verb, - union sctp_arg obj) +static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq, + enum sctp_verb verb, union sctp_arg obj) { struct sctp_cmd *cmd = seq->last_used_slot - 1; -- cgit From eb662a6a9b2a4ee3c76bbfc4c23f4dc56859b0f3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:55 +0800 Subject: sctp: remove the unused typedef sctp_sm_command_t Remove this typedef including the struct, there is even no places using it. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 9af64b98d96b..3ca75a6c4b5e 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -65,11 +65,6 @@ typedef enum { SCTP_DISPOSITION_BUG, /* This is a bug. */ } sctp_disposition_t; -typedef struct { - int name; - int action; -} sctp_sm_command_t; - typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, const struct sctp_endpoint *, const struct sctp_association *, -- cgit From 8ee821aea39c6bf4142c9319adecea6d3e1af4a2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:56 +0800 Subject: sctp: remove the typedef sctp_sm_table_entry_t This patch is to remove the typedef sctp_sm_table_entry_t, and replace with struct sctp_sm_table_entry in the places where it's using this typedef. It is also to fix some indents. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 3ca75a6c4b5e..7ad240228a0f 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -72,10 +72,10 @@ typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, void *arg, struct sctp_cmd_seq *); typedef void (sctp_timer_event_t) (unsigned long); -typedef struct { +struct sctp_sm_table_entry { sctp_state_fn_t *fn; const char *name; -} sctp_sm_table_entry_t; +}; /* A naming convention of "sctp_sf_xxx" applies to all the state functions * currently in use. @@ -170,7 +170,7 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire; /* Prototypes for utility support functions. */ __u8 sctp_get_chunk_type(struct sctp_chunk *chunk); -const sctp_sm_table_entry_t *sctp_sm_lookup_event( +const struct sctp_sm_table_entry *sctp_sm_lookup_event( struct net *net, enum sctp_event event_type, enum sctp_state state, -- cgit From 172a1599ba88df7147f6503a75686fb89c8a1f3f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:57 +0800 Subject: sctp: remove the typedef sctp_disposition_t This patch is to remove the typedef sctp_disposition_t, and replace with enum sctp_disposition in the places where it's using this typedef. It's also to fix the indent for many functions' defination. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 7ad240228a0f..33077f317995 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -53,7 +53,7 @@ /* * Possible values for the disposition are: */ -typedef enum { +enum sctp_disposition { SCTP_DISPOSITION_DISCARD, /* No further processing. */ SCTP_DISPOSITION_CONSUME, /* Process return values normally. */ SCTP_DISPOSITION_NOMEM, /* We ran out of memory--recover. */ @@ -63,14 +63,15 @@ typedef enum { SCTP_DISPOSITION_NOT_IMPL, /* This entry is not implemented. */ SCTP_DISPOSITION_ERROR, /* This is plain old user error. */ SCTP_DISPOSITION_BUG, /* This is a bug. */ -} sctp_disposition_t; - -typedef sctp_disposition_t (sctp_state_fn_t) (struct net *, - const struct sctp_endpoint *, - const struct sctp_association *, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *); +}; + +typedef enum sctp_disposition (sctp_state_fn_t) ( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); typedef void (sctp_timer_event_t) (unsigned long); struct sctp_sm_table_entry { sctp_state_fn_t *fn; -- cgit From 327c0dab8d1301cd866816de8c7f32eb872cabac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 11 Aug 2017 10:23:58 +0800 Subject: sctp: fix some indents in sm_make_chunk.c There are some bad indents of functions' defination in sm_make_chunk.c. They have been there since beginning, it was probably caused by that the typedef sctp_chunk_t was replaced with struct sctp_chunk. So it's the best time to fix them in this patchset, it's also to fix some bad indents in other functions' defination in sm_make_chunk.c. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 158 +++++++++++++++++++++++++------------------------- 1 file changed, 79 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 33077f317995..2db3d3a9ce1d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -184,68 +184,69 @@ __u32 sctp_generate_verification_tag(void); void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag); /* Prototypes for chunk-building functions. */ -struct sctp_chunk *sctp_make_init(const struct sctp_association *, - const struct sctp_bind_addr *, - gfp_t gfp, int vparam_len); -struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *, - const struct sctp_chunk *, - const gfp_t gfp, - const int unkparam_len); -struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_cwr(const struct sctp_association *, +struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, + const struct sctp_bind_addr *bp, + gfp_t gfp, int vparam_len); +struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const gfp_t gfp, const int unkparam_len); +struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, const __u32 lowest_tsn, - const struct sctp_chunk *); -struct sctp_chunk * sctp_make_datafrag_empty(struct sctp_association *, - const struct sctp_sndrcvinfo *sinfo, - int len, const __u8 flags, - __u16 ssn, gfp_t gfp); -struct sctp_chunk *sctp_make_ecne(const struct sctp_association *, - const __u32); -struct sctp_chunk *sctp_make_sack(const struct sctp_association *); + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, + const struct sctp_sndrcvinfo *sinfo, + int len, const __u8 flags, + __u16 ssn, gfp_t gfp); +struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, + const __u32 lowest_tsn); +struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_shutdown_complete(const struct sctp_association *, - const struct sctp_chunk *); -void sctp_init_cause(struct sctp_chunk *, __be16 cause, size_t); -struct sctp_chunk *sctp_make_abort(const struct sctp_association *, - const struct sctp_chunk *, - const size_t hint); -struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *, - const struct sctp_chunk *, - __u32 tsn); -struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *, - struct msghdr *, size_t msg_len); -struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *, - const struct sctp_chunk *, - const __u8 *, - const size_t ); -struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *, - const struct sctp_chunk *, - struct sctp_paramhdr *); -struct sctp_chunk *sctp_make_violation_max_retrans(const struct sctp_association *, - const struct sctp_chunk *); -struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *, - const struct sctp_transport *); -struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *, - const struct sctp_chunk *, - const void *payload, - const size_t paylen); -struct sctp_chunk *sctp_make_op_error(const struct sctp_association *, - const struct sctp_chunk *chunk, - __be16 cause_code, - const void *payload, - size_t paylen, - size_t reserve_tail); - -struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, - union sctp_addr *, - struct sockaddr *, - int, __be16); + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_shutdown_complete( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); +struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const size_t hint); +struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __u32 tsn); +struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, + struct msghdr *msg, size_t msg_len); +struct sctp_chunk *sctp_make_abort_violation( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const __u8 *payload, + const size_t paylen); +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param); +struct sctp_chunk *sctp_make_violation_max_retrans( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); +struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, + const struct sctp_transport *transport); +struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const void *payload, + const size_t paylen); +struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __be16 cause_code, const void *payload, + size_t paylen, size_t reserve_tail); + +struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, + union sctp_addr *laddr, + struct sockaddr *addrs, + int addrcnt, __be16 flags); struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr); bool sctp_verify_asconf(const struct sctp_association *asoc, @@ -259,27 +260,25 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, __u32 new_cum_tsn, size_t nstreams, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); -struct sctp_chunk *sctp_make_strreset_req( - const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, - bool out, bool in); +struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, + __u16 stream_num, __u16 *stream_list, + bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( - const struct sctp_association *asoc); + const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_addstrm( - const struct sctp_association *asoc, - __u16 out, __u16 in); -struct sctp_chunk *sctp_make_strreset_resp( - const struct sctp_association *asoc, - __u32 result, __u32 sn); -struct sctp_chunk *sctp_make_strreset_tsnresp( - struct sctp_association *asoc, - __u32 result, __u32 sn, - __u32 sender_tsn, __u32 receiver_tsn); + const struct sctp_association *asoc, + __u16 out, __u16 in); +struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc, + __u32 result, __u32 sn); +struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc, + __u32 result, __u32 sn, + __u32 sender_tsn, + __u32 receiver_tsn); bool sctp_verify_reconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_paramhdr **errp); -void sctp_chunk_assign_tsn(struct sctp_chunk *); -void sctp_chunk_assign_ssn(struct sctp_chunk *); +void sctp_chunk_assign_tsn(struct sctp_chunk *chunk); +void sctp_chunk_assign_ssn(struct sctp_chunk *chunk); /* Prototypes for stream-processing functions. */ struct sctp_chunk *sctp_process_strreset_outreq( @@ -322,11 +321,12 @@ void sctp_generate_proto_unreach_event(unsigned long peer); void sctp_ootb_pkt_free(struct sctp_packet *packet); -struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - struct sctp_chunk *chunk, - gfp_t gfp, int *err, - struct sctp_chunk **err_chk_p); +struct sctp_association *sctp_unpack_cookie( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + gfp_t gfp, int *err, + struct sctp_chunk **err_chk_p); /* 3rd level prototypes */ __u32 sctp_generate_tag(const struct sctp_endpoint *ep); -- cgit From 591b6bb605785c12a21e8b07a08a277065b655a5 Mon Sep 17 00:00:00 2001 From: Andrey Korolyov Date: Thu, 10 Aug 2017 13:21:14 +0300 Subject: cs5536: add support for IDE controller variant Several legacy devices such as Geode-based Cisco ASA appliances and DB800 development board do possess CS5536 IDE controller with different PCI id than existing one. Using pata_generic is not always feasible as at least DB800 requires MSR quirk from pata_cs5536 to be used with vendor firmware. Signed-off-by: Andrey Korolyov CC: stable@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c71e532da458..4adf6161ec77 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -576,6 +576,7 @@ #define PCI_DEVICE_ID_AMD_CS5536_EHC 0x2095 #define PCI_DEVICE_ID_AMD_CS5536_UDC 0x2096 #define PCI_DEVICE_ID_AMD_CS5536_UOC 0x2097 +#define PCI_DEVICE_ID_AMD_CS5536_DEV_IDE 0x2092 #define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A #define PCI_DEVICE_ID_AMD_LX_VIDEO 0x2081 #define PCI_DEVICE_ID_AMD_LX_AES 0x2082 -- cgit From 861932ecc36063196c80ca3e240502b0f6d0e977 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:31 +0200 Subject: net: sched: Add helpers to identify classids Offloading drivers need to understand what qdisc class a filter is added to. Currently they only need to identify ingress, clsact->ingress and clsact->egress. So provide these helpers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2579c209ea51..259bc191ba59 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -5,6 +5,7 @@ #include #include #include +#include #define DEFAULT_TX_QUEUE_LEN 1000 @@ -132,4 +133,17 @@ static inline unsigned int psched_mtu(const struct net_device *dev) return dev->mtu + dev->hard_header_len; } +static inline bool is_classid_clsact_ingress(u32 classid) +{ + /* This also returns true for ingress qdisc */ + return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && + TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS); +} + +static inline bool is_classid_clsact_egress(u32 classid) +{ + return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) && + TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS); +} + #endif -- cgit From 7690f2a51d8afe51ac97e7fae66b081f192a7158 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:32 +0200 Subject: net: sched: propagate classid down to offload drivers Drivers need classid to decide they support this specific qdisc+class or not. So propagate it down via the tc_cls_common_offload struct. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f78e6560b2d..1f1de20e584f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -410,6 +410,7 @@ struct tc_cls_common_offload { u32 chain_index; __be16 protocol; u32 prio; + u32 classid; }; static inline void @@ -420,6 +421,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; + cls_common->classid = tp->classid; } struct tc_cls_u32_knode { -- cgit From 237f79d24ebe1eb9b5651b7342ba5cc9d9b8f222 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:34 +0200 Subject: net: sched: remove handle propagation down to the drivers There is no longer need to use handle in drivers, so remove it from tc_cls_common_offload struct. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 1f1de20e584f..bd9dd79357fe 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -406,7 +406,6 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) #endif /* CONFIG_NET_CLS_IND */ struct tc_cls_common_offload { - u32 handle; u32 chain_index; __be16 protocol; u32 prio; @@ -417,7 +416,6 @@ static inline void tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, const struct tcf_proto *tp) { - cls_common->handle = tp->q->handle; cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; -- cgit From 7b06e8aed283081010596c98a67f06c595affe51 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 9 Aug 2017 14:30:35 +0200 Subject: net: sched: remove cops->tcf_cl_offload cops->tcf_cl_offload is no longer needed, as the drivers check what they can and cannot offload using the classid identify helpers. So remove this. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 14 +++----------- include/net/sch_generic.h | 1 - 2 files changed, 3 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index bd9dd79357fe..e80edd8879ef 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -457,19 +457,12 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_can_offload(const struct net_device *dev, - const struct tcf_proto *tp) +static inline bool tc_can_offload(const struct net_device *dev) { - const struct Qdisc *sch = tp->q; - const struct Qdisc_class_ops *cops = sch->ops->cl_ops; - if (!(dev->features & NETIF_F_HW_TC)) return false; if (!dev->netdev_ops->ndo_setup_tc) return false; - if (cops && cops->tcf_cl_offload) - return cops->tcf_cl_offload(tp->classid); - return true; } @@ -478,12 +471,11 @@ static inline bool tc_skip_hw(u32 flags) return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; } -static inline bool tc_should_offload(const struct net_device *dev, - const struct tcf_proto *tp, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, u32 flags) { if (tc_skip_hw(flags)) return false; - return tc_can_offload(dev, tp); + return tc_can_offload(dev); } static inline bool tc_skip_sw(u32 flags) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e79f5ad1c5f3..5865db91976b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -156,7 +156,6 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_block * (*tcf_block)(struct Qdisc *, unsigned long); - bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); -- cgit From ad729bc9acfb7c47112964b4877ef5404578ed13 Mon Sep 17 00:00:00 2001 From: Andreas Born Date: Thu, 10 Aug 2017 06:41:44 +0200 Subject: bonding: require speed/duplex only for 802.3ad, alb and tlb The patch c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") puts the link state to down if bond_update_speed_duplex() cannot retrieve speed and duplex settings. Assumably the patch was written with 802.3ad mode in mind which relies on link speed/duplex settings. For other modes like active-backup these settings are not required. Thus, only for these other modes, this patch reintroduces support for slaves that do not support reporting speed or duplex such as wireless devices. This fixes the regression reported in bug 196547 (https://bugzilla.kernel.org/show_bug.cgi?id=196547). Fixes: c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") Signed-off-by: Andreas Born Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/bonding.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index b00508d22e0a..b2e68657a216 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -277,6 +277,11 @@ static inline bool bond_is_lb(const struct bonding *bond) BOND_MODE(bond) == BOND_MODE_ALB; } +static inline bool bond_needs_speed_duplex(const struct bonding *bond) +{ + return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond); +} + static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_TLB) && -- cgit From e4dde4127396f0c8f1c2e11b3ecc5baf4f8628bf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 11 Aug 2017 18:31:24 +0200 Subject: net: fix compilation when busy poll is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIN_NAPI_ID is used in various places outside of CONFIG_NET_RX_BUSY_POLL wrapping, so when it's not set we run into build errors such as: net/core/dev.c: In function 'dev_get_by_napi_id': net/core/dev.c:886:16: error: ‘MIN_NAPI_ID’ undeclared (first use in this function) if (napi_id < MIN_NAPI_ID) ^~~~~~~~~~~ Thus, have MIN_NAPI_ID always defined to fix these errors. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/busy_poll.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8ffd434676b7..71c72a939bf8 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -29,18 +29,18 @@ #include #include -#ifdef CONFIG_NET_RX_BUSY_POLL - -struct napi_struct; -extern unsigned int sysctl_net_busy_read __read_mostly; -extern unsigned int sysctl_net_busy_poll __read_mostly; - /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu * NR_CPUS+1..~0 - Region available for NAPI IDs */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +#ifdef CONFIG_NET_RX_BUSY_POLL + +struct napi_struct; +extern unsigned int sysctl_net_busy_read __read_mostly; +extern unsigned int sysctl_net_busy_poll __read_mostly; + static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; -- cgit From fd851ba9caa9a63fdbb72a2e6ed5560c0989e999 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Aug 2017 10:48:53 -0700 Subject: udp: harden copy_linear_skb() syzkaller got crashes with CONFIG_HARDENED_USERCOPY=y configs. Issue here is that recvfrom() can be used with user buffer of Z bytes, and SO_PEEK_OFF of X bytes, from a skb with Y bytes, and following condition : Z < X < Y kernel BUG at mm/usercopy.c:72! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 2917 Comm: syzkaller842281 Not tainted 4.13.0-rc3+ #16 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d2fa40c0 task.stack: ffff8801d1fe8000 RIP: 0010:report_usercopy mm/usercopy.c:64 [inline] RIP: 0010:__check_object_size+0x3ad/0x500 mm/usercopy.c:264 RSP: 0018:ffff8801d1fef8a8 EFLAGS: 00010286 RAX: 0000000000000078 RBX: ffffffff847102c0 RCX: 0000000000000000 RDX: 0000000000000078 RSI: 1ffff1003a3fded5 RDI: ffffed003a3fdf09 RBP: ffff8801d1fef998 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d1ea480e R13: fffffffffffffffa R14: ffffffff84710280 R15: dffffc0000000000 FS: 0000000001360880(0000) GS:ffff8801dc000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000202ecfe4 CR3: 00000001d1ff8000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: check_object_size include/linux/thread_info.h:108 [inline] check_copy_size include/linux/thread_info.h:139 [inline] copy_to_iter include/linux/uio.h:105 [inline] copy_linear_skb include/net/udp.h:371 [inline] udpv6_recvmsg+0x1040/0x1af0 net/ipv6/udp.c:395 inet_recvmsg+0x14c/0x5f0 net/ipv4/af_inet.c:793 sock_recvmsg_nosec net/socket.c:792 [inline] sock_recvmsg+0xc9/0x110 net/socket.c:799 SYSC_recvfrom+0x2d6/0x570 net/socket.c:1788 SyS_recvfrom+0x40/0x50 net/socket.c:1760 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..e9b1d1eacb59 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -368,6 +368,8 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, { int n, copy = len - off; + if (copy < 0) + return -EINVAL; n = copy_to_iter(skb->data + off, copy, to); if (n == copy) return 0; -- cgit From 45fff14bab00eea97e0f60a2a5ba3eca97bd476a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 3 Aug 2017 11:14:13 +0200 Subject: iio: trigger: stm32-timer: add support for STM32H7 Add support for STM32H7 timer triggers: - Add new valids_table - Introduce compatible, with configuration data - Extend up to 15 timers, available on STM32H7 Signed-off-by: Fabrice Gasnier Signed-off-by: Jonathan Cameron --- include/linux/iio/timer/stm32-timer-trigger.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h index fa7d786ed99e..20f465353a7c 100644 --- a/include/linux/iio/timer/stm32-timer-trigger.h +++ b/include/linux/iio/timer/stm32-timer-trigger.h @@ -59,6 +59,8 @@ #define TIM12_CH1 "tim12_ch1" #define TIM12_CH2 "tim12_ch2" +#define TIM15_TRGO "tim15_trgo" + bool is_stm32_timer_trigger(struct iio_trigger *trig); #endif -- cgit From 25892d6eac9cb752af95bcd539ed5e1158eba760 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 3 Aug 2017 11:14:14 +0200 Subject: iio: trigger: stm32-timer: add output compare triggers Add output compare trigger sources available on some instances. Signed-off-by: Fabrice Gasnier Signed-off-by: Jonathan Cameron --- include/linux/iio/timer/stm32-timer-trigger.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h index 20f465353a7c..d68add80ab86 100644 --- a/include/linux/iio/timer/stm32-timer-trigger.h +++ b/include/linux/iio/timer/stm32-timer-trigger.h @@ -55,12 +55,24 @@ #define TIM9_CH1 "tim9_ch1" #define TIM9_CH2 "tim9_ch2" +#define TIM10_OC1 "tim10_oc1" + +#define TIM11_OC1 "tim11_oc1" + #define TIM12_TRGO "tim12_trgo" #define TIM12_CH1 "tim12_ch1" #define TIM12_CH2 "tim12_ch2" +#define TIM13_OC1 "tim13_oc1" + +#define TIM14_OC1 "tim14_oc1" + #define TIM15_TRGO "tim15_trgo" +#define TIM16_OC1 "tim16_oc1" + +#define TIM17_OC1 "tim17_oc1" + bool is_stm32_timer_trigger(struct iio_trigger *trig); #endif -- cgit From d4092d76a4a4e57b65910899948a83cc8646c5a5 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 4 Aug 2017 17:29:10 +0200 Subject: mtd: nand: Rename nand.h into rawnand.h We are planning to share more code between different NAND based devices (SPI NAND, OneNAND and raw NANDs), but before doing that we need to move the existing include/linux/mtd/nand.h file into include/linux/mtd/rawnand.h so we can later create a nand.h header containing all common structure and function prototypes. Signed-off-by: Boris Brezillon Signed-off-by: Peter Pan Acked-by: Vladimir Zapolskiy Acked-by: Alexander Sverdlin Acked-by: Wenyou Yang Acked-by: Krzysztof Kozlowski Acked-by: Han Xu Acked-by: H Hartley Sweeten Acked-by: Shawn Guo Acked-by: Gregory CLEMENT Acked-by: Neil Armstrong Acked-by: Masahiro Yamada Acked-By: Harvey Hunt Acked-by: Tony Lindgren Acked-by: Krzysztof Halasa --- include/linux/mtd/nand-gpio.h | 2 +- include/linux/mtd/nand.h | 1331 ------------------------ include/linux/mtd/rawnand.h | 1329 +++++++++++++++++++++++ include/linux/mtd/sh_flctl.h | 2 +- include/linux/mtd/sharpsl.h | 2 +- include/linux/platform_data/mtd-davinci.h | 2 +- include/linux/platform_data/mtd-nand-s3c2410.h | 2 +- 7 files changed, 1334 insertions(+), 1336 deletions(-) delete mode 100644 include/linux/mtd/nand.h create mode 100644 include/linux/mtd/rawnand.h (limited to 'include') diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h index 51534e50f7fc..be4f45d89be2 100644 --- a/include/linux/mtd/nand-gpio.h +++ b/include/linux/mtd/nand-gpio.h @@ -1,7 +1,7 @@ #ifndef __LINUX_MTD_NAND_GPIO_H #define __LINUX_MTD_NAND_GPIO_H -#include +#include struct gpio_nand_platdata { int gpio_nce; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h deleted file mode 100644 index 892148c448cc..000000000000 --- a/include/linux/mtd/nand.h +++ /dev/null @@ -1,1331 +0,0 @@ -/* - * linux/include/linux/mtd/nand.h - * - * Copyright © 2000-2010 David Woodhouse - * Steven J. Hill - * Thomas Gleixner - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Info: - * Contains standard defines and IDs for NAND flash devices - * - * Changelog: - * See git changelog. - */ -#ifndef __LINUX_MTD_NAND_H -#define __LINUX_MTD_NAND_H - -#include -#include -#include -#include -#include - -struct mtd_info; -struct nand_flash_dev; -struct device_node; - -/* Scan and identify a NAND device */ -int nand_scan(struct mtd_info *mtd, int max_chips); -/* - * Separate phases of nand_scan(), allowing board driver to intervene - * and override command or ECC setup according to flash type. - */ -int nand_scan_ident(struct mtd_info *mtd, int max_chips, - struct nand_flash_dev *table); -int nand_scan_tail(struct mtd_info *mtd); - -/* Unregister the MTD device and free resources held by the NAND device */ -void nand_release(struct mtd_info *mtd); - -/* Internal helper for board drivers which need to override command function */ -void nand_wait_ready(struct mtd_info *mtd); - -/* locks all blocks present in the device */ -int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); - -/* unlocks specified locked blocks */ -int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); - -/* The maximum number of NAND chips in an array */ -#define NAND_MAX_CHIPS 8 - -/* - * Constants for hardware specific CLE/ALE/NCE function - * - * These are bits which can be or'ed to set/clear multiple - * bits in one go. - */ -/* Select the chip by setting nCE to low */ -#define NAND_NCE 0x01 -/* Select the command latch by setting CLE to high */ -#define NAND_CLE 0x02 -/* Select the address latch by setting ALE to high */ -#define NAND_ALE 0x04 - -#define NAND_CTRL_CLE (NAND_NCE | NAND_CLE) -#define NAND_CTRL_ALE (NAND_NCE | NAND_ALE) -#define NAND_CTRL_CHANGE 0x80 - -/* - * Standard NAND flash commands - */ -#define NAND_CMD_READ0 0 -#define NAND_CMD_READ1 1 -#define NAND_CMD_RNDOUT 5 -#define NAND_CMD_PAGEPROG 0x10 -#define NAND_CMD_READOOB 0x50 -#define NAND_CMD_ERASE1 0x60 -#define NAND_CMD_STATUS 0x70 -#define NAND_CMD_SEQIN 0x80 -#define NAND_CMD_RNDIN 0x85 -#define NAND_CMD_READID 0x90 -#define NAND_CMD_ERASE2 0xd0 -#define NAND_CMD_PARAM 0xec -#define NAND_CMD_GET_FEATURES 0xee -#define NAND_CMD_SET_FEATURES 0xef -#define NAND_CMD_RESET 0xff - -#define NAND_CMD_LOCK 0x2a -#define NAND_CMD_UNLOCK1 0x23 -#define NAND_CMD_UNLOCK2 0x24 - -/* Extended commands for large page devices */ -#define NAND_CMD_READSTART 0x30 -#define NAND_CMD_RNDOUTSTART 0xE0 -#define NAND_CMD_CACHEDPROG 0x15 - -#define NAND_CMD_NONE -1 - -/* Status bits */ -#define NAND_STATUS_FAIL 0x01 -#define NAND_STATUS_FAIL_N1 0x02 -#define NAND_STATUS_TRUE_READY 0x20 -#define NAND_STATUS_READY 0x40 -#define NAND_STATUS_WP 0x80 - -#define NAND_DATA_IFACE_CHECK_ONLY -1 - -/* - * Constants for ECC_MODES - */ -typedef enum { - NAND_ECC_NONE, - NAND_ECC_SOFT, - NAND_ECC_HW, - NAND_ECC_HW_SYNDROME, - NAND_ECC_HW_OOB_FIRST, - NAND_ECC_ON_DIE, -} nand_ecc_modes_t; - -enum nand_ecc_algo { - NAND_ECC_UNKNOWN, - NAND_ECC_HAMMING, - NAND_ECC_BCH, -}; - -/* - * Constants for Hardware ECC - */ -/* Reset Hardware ECC for read */ -#define NAND_ECC_READ 0 -/* Reset Hardware ECC for write */ -#define NAND_ECC_WRITE 1 -/* Enable Hardware ECC before syndrome is read back from flash */ -#define NAND_ECC_READSYN 2 - -/* - * Enable generic NAND 'page erased' check. This check is only done when - * ecc.correct() returns -EBADMSG. - * Set this flag if your implementation does not fix bitflips in erased - * pages and you want to rely on the default implementation. - */ -#define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) -#define NAND_ECC_MAXIMIZE BIT(1) -/* - * If your controller already sends the required NAND commands when - * reading or writing a page, then the framework is not supposed to - * send READ0 and SEQIN/PAGEPROG respectively. - */ -#define NAND_ECC_CUSTOM_PAGE_ACCESS BIT(2) - -/* Bit mask for flags passed to do_nand_read_ecc */ -#define NAND_GET_DEVICE 0x80 - - -/* - * Option constants for bizarre disfunctionality and real - * features. - */ -/* Buswidth is 16 bit */ -#define NAND_BUSWIDTH_16 0x00000002 -/* Chip has cache program function */ -#define NAND_CACHEPRG 0x00000008 -/* - * Chip requires ready check on read (for auto-incremented sequential read). - * True only for small page devices; large page devices do not support - * autoincrement. - */ -#define NAND_NEED_READRDY 0x00000100 - -/* Chip does not allow subpage writes */ -#define NAND_NO_SUBPAGE_WRITE 0x00000200 - -/* Device is one of 'new' xD cards that expose fake nand command set */ -#define NAND_BROKEN_XD 0x00000400 - -/* Device behaves just like nand, but is readonly */ -#define NAND_ROM 0x00000800 - -/* Device supports subpage reads */ -#define NAND_SUBPAGE_READ 0x00001000 - -/* - * Some MLC NANDs need data scrambling to limit bitflips caused by repeated - * patterns. - */ -#define NAND_NEED_SCRAMBLING 0x00002000 - -/* Options valid for Samsung large page devices */ -#define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG - -/* Macros to identify the above */ -#define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG)) -#define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ)) -#define NAND_HAS_SUBPAGE_WRITE(chip) !((chip)->options & NAND_NO_SUBPAGE_WRITE) - -/* Non chip related options */ -/* This option skips the bbt scan during initialization. */ -#define NAND_SKIP_BBTSCAN 0x00010000 -/* - * This option is defined if the board driver allocates its own buffers - * (e.g. because it needs them DMA-coherent). - */ -#define NAND_OWN_BUFFERS 0x00020000 -/* Chip may not exist, so silence any errors in scan */ -#define NAND_SCAN_SILENT_NODEV 0x00040000 -/* - * Autodetect nand buswidth with readid/onfi. - * This suppose the driver will configure the hardware in 8 bits mode - * when calling nand_scan_ident, and update its configuration - * before calling nand_scan_tail. - */ -#define NAND_BUSWIDTH_AUTO 0x00080000 -/* - * This option could be defined by controller drivers to protect against - * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers - */ -#define NAND_USE_BOUNCE_BUFFER 0x00100000 - -/* - * In case your controller is implementing ->cmd_ctrl() and is relying on the - * default ->cmdfunc() implementation, you may want to let the core handle the - * tCCS delay which is required when a column change (RNDIN or RNDOUT) is - * requested. - * If your controller already takes care of this delay, you don't need to set - * this flag. - */ -#define NAND_WAIT_TCCS 0x00200000 - -/* Options set by nand scan */ -/* Nand scan has allocated controller struct */ -#define NAND_CONTROLLER_ALLOC 0x80000000 - -/* Cell info constants */ -#define NAND_CI_CHIPNR_MSK 0x03 -#define NAND_CI_CELLTYPE_MSK 0x0C -#define NAND_CI_CELLTYPE_SHIFT 2 - -/* Keep gcc happy */ -struct nand_chip; - -/* ONFI features */ -#define ONFI_FEATURE_16_BIT_BUS (1 << 0) -#define ONFI_FEATURE_EXT_PARAM_PAGE (1 << 7) - -/* ONFI timing mode, used in both asynchronous and synchronous mode */ -#define ONFI_TIMING_MODE_0 (1 << 0) -#define ONFI_TIMING_MODE_1 (1 << 1) -#define ONFI_TIMING_MODE_2 (1 << 2) -#define ONFI_TIMING_MODE_3 (1 << 3) -#define ONFI_TIMING_MODE_4 (1 << 4) -#define ONFI_TIMING_MODE_5 (1 << 5) -#define ONFI_TIMING_MODE_UNKNOWN (1 << 6) - -/* ONFI feature address */ -#define ONFI_FEATURE_ADDR_TIMING_MODE 0x1 - -/* Vendor-specific feature address (Micron) */ -#define ONFI_FEATURE_ADDR_READ_RETRY 0x89 -#define ONFI_FEATURE_ON_DIE_ECC 0x90 -#define ONFI_FEATURE_ON_DIE_ECC_EN BIT(3) - -/* ONFI subfeature parameters length */ -#define ONFI_SUBFEATURE_PARAM_LEN 4 - -/* ONFI optional commands SET/GET FEATURES supported? */ -#define ONFI_OPT_CMD_SET_GET_FEATURES (1 << 2) - -struct nand_onfi_params { - /* rev info and features block */ - /* 'O' 'N' 'F' 'I' */ - u8 sig[4]; - __le16 revision; - __le16 features; - __le16 opt_cmd; - u8 reserved0[2]; - __le16 ext_param_page_length; /* since ONFI 2.1 */ - u8 num_of_param_pages; /* since ONFI 2.1 */ - u8 reserved1[17]; - - /* manufacturer information block */ - char manufacturer[12]; - char model[20]; - u8 jedec_id; - __le16 date_code; - u8 reserved2[13]; - - /* memory organization block */ - __le32 byte_per_page; - __le16 spare_bytes_per_page; - __le32 data_bytes_per_ppage; - __le16 spare_bytes_per_ppage; - __le32 pages_per_block; - __le32 blocks_per_lun; - u8 lun_count; - u8 addr_cycles; - u8 bits_per_cell; - __le16 bb_per_lun; - __le16 block_endurance; - u8 guaranteed_good_blocks; - __le16 guaranteed_block_endurance; - u8 programs_per_page; - u8 ppage_attr; - u8 ecc_bits; - u8 interleaved_bits; - u8 interleaved_ops; - u8 reserved3[13]; - - /* electrical parameter block */ - u8 io_pin_capacitance_max; - __le16 async_timing_mode; - __le16 program_cache_timing_mode; - __le16 t_prog; - __le16 t_bers; - __le16 t_r; - __le16 t_ccs; - __le16 src_sync_timing_mode; - u8 src_ssync_features; - __le16 clk_pin_capacitance_typ; - __le16 io_pin_capacitance_typ; - __le16 input_pin_capacitance_typ; - u8 input_pin_capacitance_max; - u8 driver_strength_support; - __le16 t_int_r; - __le16 t_adl; - u8 reserved4[8]; - - /* vendor */ - __le16 vendor_revision; - u8 vendor[88]; - - __le16 crc; -} __packed; - -#define ONFI_CRC_BASE 0x4F4E - -/* Extended ECC information Block Definition (since ONFI 2.1) */ -struct onfi_ext_ecc_info { - u8 ecc_bits; - u8 codeword_size; - __le16 bb_per_lun; - __le16 block_endurance; - u8 reserved[2]; -} __packed; - -#define ONFI_SECTION_TYPE_0 0 /* Unused section. */ -#define ONFI_SECTION_TYPE_1 1 /* for additional sections. */ -#define ONFI_SECTION_TYPE_2 2 /* for ECC information. */ -struct onfi_ext_section { - u8 type; - u8 length; -} __packed; - -#define ONFI_EXT_SECTION_MAX 8 - -/* Extended Parameter Page Definition (since ONFI 2.1) */ -struct onfi_ext_param_page { - __le16 crc; - u8 sig[4]; /* 'E' 'P' 'P' 'S' */ - u8 reserved0[10]; - struct onfi_ext_section sections[ONFI_EXT_SECTION_MAX]; - - /* - * The actual size of the Extended Parameter Page is in - * @ext_param_page_length of nand_onfi_params{}. - * The following are the variable length sections. - * So we do not add any fields below. Please see the ONFI spec. - */ -} __packed; - -struct jedec_ecc_info { - u8 ecc_bits; - u8 codeword_size; - __le16 bb_per_lun; - __le16 block_endurance; - u8 reserved[2]; -} __packed; - -/* JEDEC features */ -#define JEDEC_FEATURE_16_BIT_BUS (1 << 0) - -struct nand_jedec_params { - /* rev info and features block */ - /* 'J' 'E' 'S' 'D' */ - u8 sig[4]; - __le16 revision; - __le16 features; - u8 opt_cmd[3]; - __le16 sec_cmd; - u8 num_of_param_pages; - u8 reserved0[18]; - - /* manufacturer information block */ - char manufacturer[12]; - char model[20]; - u8 jedec_id[6]; - u8 reserved1[10]; - - /* memory organization block */ - __le32 byte_per_page; - __le16 spare_bytes_per_page; - u8 reserved2[6]; - __le32 pages_per_block; - __le32 blocks_per_lun; - u8 lun_count; - u8 addr_cycles; - u8 bits_per_cell; - u8 programs_per_page; - u8 multi_plane_addr; - u8 multi_plane_op_attr; - u8 reserved3[38]; - - /* electrical parameter block */ - __le16 async_sdr_speed_grade; - __le16 toggle_ddr_speed_grade; - __le16 sync_ddr_speed_grade; - u8 async_sdr_features; - u8 toggle_ddr_features; - u8 sync_ddr_features; - __le16 t_prog; - __le16 t_bers; - __le16 t_r; - __le16 t_r_multi_plane; - __le16 t_ccs; - __le16 io_pin_capacitance_typ; - __le16 input_pin_capacitance_typ; - __le16 clk_pin_capacitance_typ; - u8 driver_strength_support; - __le16 t_adl; - u8 reserved4[36]; - - /* ECC and endurance block */ - u8 guaranteed_good_blocks; - __le16 guaranteed_block_endurance; - struct jedec_ecc_info ecc_info[4]; - u8 reserved5[29]; - - /* reserved */ - u8 reserved6[148]; - - /* vendor */ - __le16 vendor_rev_num; - u8 reserved7[88]; - - /* CRC for Parameter Page */ - __le16 crc; -} __packed; - -/** - * struct nand_id - NAND id structure - * @data: buffer containing the id bytes. Currently 8 bytes large, but can - * be extended if required. - * @len: ID length. - */ -struct nand_id { - u8 data[8]; - int len; -}; - -/** - * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices - * @lock: protection lock - * @active: the mtd device which holds the controller currently - * @wq: wait queue to sleep on if a NAND operation is in - * progress used instead of the per chip wait queue - * when a hw controller is available. - */ -struct nand_hw_control { - spinlock_t lock; - struct nand_chip *active; - wait_queue_head_t wq; -}; - -static inline void nand_hw_control_init(struct nand_hw_control *nfc) -{ - nfc->active = NULL; - spin_lock_init(&nfc->lock); - init_waitqueue_head(&nfc->wq); -} - -/** - * struct nand_ecc_step_info - ECC step information of ECC engine - * @stepsize: data bytes per ECC step - * @strengths: array of supported strengths - * @nstrengths: number of supported strengths - */ -struct nand_ecc_step_info { - int stepsize; - const int *strengths; - int nstrengths; -}; - -/** - * struct nand_ecc_caps - capability of ECC engine - * @stepinfos: array of ECC step information - * @nstepinfos: number of ECC step information - * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step - */ -struct nand_ecc_caps { - const struct nand_ecc_step_info *stepinfos; - int nstepinfos; - int (*calc_ecc_bytes)(int step_size, int strength); -}; - -/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */ -#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...) \ -static const int __name##_strengths[] = { __VA_ARGS__ }; \ -static const struct nand_ecc_step_info __name##_stepinfo = { \ - .stepsize = __step, \ - .strengths = __name##_strengths, \ - .nstrengths = ARRAY_SIZE(__name##_strengths), \ -}; \ -static const struct nand_ecc_caps __name = { \ - .stepinfos = &__name##_stepinfo, \ - .nstepinfos = 1, \ - .calc_ecc_bytes = __calc, \ -} - -/** - * struct nand_ecc_ctrl - Control structure for ECC - * @mode: ECC mode - * @algo: ECC algorithm - * @steps: number of ECC steps per page - * @size: data bytes per ECC step - * @bytes: ECC bytes per step - * @strength: max number of correctible bits per ECC step - * @total: total number of ECC bytes per page - * @prepad: padding information for syndrome based ECC generators - * @postpad: padding information for syndrome based ECC generators - * @options: ECC specific options (see NAND_ECC_XXX flags defined above) - * @priv: pointer to private ECC control data - * @hwctl: function to control hardware ECC generator. Must only - * be provided if an hardware ECC is available - * @calculate: function for ECC calculation or readback from ECC hardware - * @correct: function for ECC correction, matching to ECC generator (sw/hw). - * Should return a positive number representing the number of - * corrected bitflips, -EBADMSG if the number of bitflips exceed - * ECC strength, or any other error code if the error is not - * directly related to correction. - * If -EBADMSG is returned the input buffers should be left - * untouched. - * @read_page_raw: function to read a raw page without ECC. This function - * should hide the specific layout used by the ECC - * controller and always return contiguous in-band and - * out-of-band data even if they're not stored - * contiguously on the NAND chip (e.g. - * NAND_ECC_HW_SYNDROME interleaves in-band and - * out-of-band data). - * @write_page_raw: function to write a raw page without ECC. This function - * should hide the specific layout used by the ECC - * controller and consider the passed data as contiguous - * in-band and out-of-band data. ECC controller is - * responsible for doing the appropriate transformations - * to adapt to its specific layout (e.g. - * NAND_ECC_HW_SYNDROME interleaves in-band and - * out-of-band data). - * @read_page: function to read a page according to the ECC generator - * requirements; returns maximum number of bitflips corrected in - * any single ECC step, -EIO hw error - * @read_subpage: function to read parts of the page covered by ECC; - * returns same as read_page() - * @write_subpage: function to write parts of the page covered by ECC. - * @write_page: function to write a page according to the ECC generator - * requirements. - * @write_oob_raw: function to write chip OOB data without ECC - * @read_oob_raw: function to read chip OOB data without ECC - * @read_oob: function to read chip OOB data - * @write_oob: function to write chip OOB data - */ -struct nand_ecc_ctrl { - nand_ecc_modes_t mode; - enum nand_ecc_algo algo; - int steps; - int size; - int bytes; - int total; - int strength; - int prepad; - int postpad; - unsigned int options; - void *priv; - void (*hwctl)(struct mtd_info *mtd, int mode); - int (*calculate)(struct mtd_info *mtd, const uint8_t *dat, - uint8_t *ecc_code); - int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc, - uint8_t *calc_ecc); - int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf, int oob_required, int page); - int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required, int page); - int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf, int oob_required, int page); - int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offs, uint32_t len, uint8_t *buf, int page); - int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offset, uint32_t data_len, - const uint8_t *data_buf, int oob_required, int page); - int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required, int page); - int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, - int page); - int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, - int page); - int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page); - int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip, - int page); -}; - -static inline int nand_standard_page_accessors(struct nand_ecc_ctrl *ecc) -{ - return !(ecc->options & NAND_ECC_CUSTOM_PAGE_ACCESS); -} - -/** - * struct nand_buffers - buffer structure for read/write - * @ecccalc: buffer pointer for calculated ECC, size is oobsize. - * @ecccode: buffer pointer for ECC read from flash, size is oobsize. - * @databuf: buffer pointer for data, size is (page size + oobsize). - * - * Do not change the order of buffers. databuf and oobrbuf must be in - * consecutive order. - */ -struct nand_buffers { - uint8_t *ecccalc; - uint8_t *ecccode; - uint8_t *databuf; -}; - -/** - * struct nand_sdr_timings - SDR NAND chip timings - * - * This struct defines the timing requirements of a SDR NAND chip. - * These information can be found in every NAND datasheets and the timings - * meaning are described in the ONFI specifications: - * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing - * Parameters) - * - * All these timings are expressed in picoseconds. - * - * @tBERS_max: Block erase time - * @tCCS_min: Change column setup time - * @tPROG_max: Page program time - * @tR_max: Page read time - * @tALH_min: ALE hold time - * @tADL_min: ALE to data loading time - * @tALS_min: ALE setup time - * @tAR_min: ALE to RE# delay - * @tCEA_max: CE# access time - * @tCEH_min: CE# high hold time - * @tCH_min: CE# hold time - * @tCHZ_max: CE# high to output hi-Z - * @tCLH_min: CLE hold time - * @tCLR_min: CLE to RE# delay - * @tCLS_min: CLE setup time - * @tCOH_min: CE# high to output hold - * @tCS_min: CE# setup time - * @tDH_min: Data hold time - * @tDS_min: Data setup time - * @tFEAT_max: Busy time for Set Features and Get Features - * @tIR_min: Output hi-Z to RE# low - * @tITC_max: Interface and Timing Mode Change time - * @tRC_min: RE# cycle time - * @tREA_max: RE# access time - * @tREH_min: RE# high hold time - * @tRHOH_min: RE# high to output hold - * @tRHW_min: RE# high to WE# low - * @tRHZ_max: RE# high to output hi-Z - * @tRLOH_min: RE# low to output hold - * @tRP_min: RE# pulse width - * @tRR_min: Ready to RE# low (data only) - * @tRST_max: Device reset time, measured from the falling edge of R/B# to the - * rising edge of R/B#. - * @tWB_max: WE# high to SR[6] low - * @tWC_min: WE# cycle time - * @tWH_min: WE# high hold time - * @tWHR_min: WE# high to RE# low - * @tWP_min: WE# pulse width - * @tWW_min: WP# transition to WE# low - */ -struct nand_sdr_timings { - u32 tBERS_max; - u32 tCCS_min; - u32 tPROG_max; - u32 tR_max; - u32 tALH_min; - u32 tADL_min; - u32 tALS_min; - u32 tAR_min; - u32 tCEA_max; - u32 tCEH_min; - u32 tCH_min; - u32 tCHZ_max; - u32 tCLH_min; - u32 tCLR_min; - u32 tCLS_min; - u32 tCOH_min; - u32 tCS_min; - u32 tDH_min; - u32 tDS_min; - u32 tFEAT_max; - u32 tIR_min; - u32 tITC_max; - u32 tRC_min; - u32 tREA_max; - u32 tREH_min; - u32 tRHOH_min; - u32 tRHW_min; - u32 tRHZ_max; - u32 tRLOH_min; - u32 tRP_min; - u32 tRR_min; - u64 tRST_max; - u32 tWB_max; - u32 tWC_min; - u32 tWH_min; - u32 tWHR_min; - u32 tWP_min; - u32 tWW_min; -}; - -/** - * enum nand_data_interface_type - NAND interface timing type - * @NAND_SDR_IFACE: Single Data Rate interface - */ -enum nand_data_interface_type { - NAND_SDR_IFACE, -}; - -/** - * struct nand_data_interface - NAND interface timing - * @type: type of the timing - * @timings: The timing, type according to @type - */ -struct nand_data_interface { - enum nand_data_interface_type type; - union { - struct nand_sdr_timings sdr; - } timings; -}; - -/** - * nand_get_sdr_timings - get SDR timing from data interface - * @conf: The data interface - */ -static inline const struct nand_sdr_timings * -nand_get_sdr_timings(const struct nand_data_interface *conf) -{ - if (conf->type != NAND_SDR_IFACE) - return ERR_PTR(-EINVAL); - - return &conf->timings.sdr; -} - -/** - * struct nand_manufacturer_ops - NAND Manufacturer operations - * @detect: detect the NAND memory organization and capabilities - * @init: initialize all vendor specific fields (like the ->read_retry() - * implementation) if any. - * @cleanup: the ->init() function may have allocated resources, ->cleanup() - * is here to let vendor specific code release those resources. - */ -struct nand_manufacturer_ops { - void (*detect)(struct nand_chip *chip); - int (*init)(struct nand_chip *chip); - void (*cleanup)(struct nand_chip *chip); -}; - -/** - * struct nand_chip - NAND Private Flash Chip Data - * @mtd: MTD device registered to the MTD framework - * @IO_ADDR_R: [BOARDSPECIFIC] address to read the 8 I/O lines of the - * flash device - * @IO_ADDR_W: [BOARDSPECIFIC] address to write the 8 I/O lines of the - * flash device. - * @read_byte: [REPLACEABLE] read one byte from the chip - * @read_word: [REPLACEABLE] read one word from the chip - * @write_byte: [REPLACEABLE] write a single byte to the chip on the - * low 8 I/O lines - * @write_buf: [REPLACEABLE] write data from the buffer to the chip - * @read_buf: [REPLACEABLE] read data from the chip into the buffer - * @select_chip: [REPLACEABLE] select chip nr - * @block_bad: [REPLACEABLE] check if a block is bad, using OOB markers - * @block_markbad: [REPLACEABLE] mark a block bad - * @cmd_ctrl: [BOARDSPECIFIC] hardwarespecific function for controlling - * ALE/CLE/nCE. Also used to write command and address - * @dev_ready: [BOARDSPECIFIC] hardwarespecific function for accessing - * device ready/busy line. If set to NULL no access to - * ready/busy is available and the ready/busy information - * is read from the chip status register. - * @cmdfunc: [REPLACEABLE] hardwarespecific function for writing - * commands to the chip. - * @waitfunc: [REPLACEABLE] hardwarespecific function for wait on - * ready. - * @setup_read_retry: [FLASHSPECIFIC] flash (vendor) specific function for - * setting the read-retry mode. Mostly needed for MLC NAND. - * @ecc: [BOARDSPECIFIC] ECC control structure - * @buffers: buffer structure for read/write - * @buf_align: minimum buffer alignment required by a platform - * @hwcontrol: platform-specific hardware control structure - * @erase: [REPLACEABLE] erase function - * @scan_bbt: [REPLACEABLE] function to scan bad block table - * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring - * data from array to read regs (tR). - * @state: [INTERN] the current state of the NAND device - * @oob_poi: "poison value buffer," used for laying out OOB data - * before writing - * @page_shift: [INTERN] number of address bits in a page (column - * address bits). - * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock - * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry - * @chip_shift: [INTERN] number of address bits in one chip - * @options: [BOARDSPECIFIC] various chip options. They can partly - * be set to inform nand_scan about special functionality. - * See the defines for further explanation. - * @bbt_options: [INTERN] bad block specific options. All options used - * here must come from bbm.h. By default, these options - * will be copied to the appropriate nand_bbt_descr's. - * @badblockpos: [INTERN] position of the bad block marker in the oob - * area. - * @badblockbits: [INTERN] minimum number of set bits in a good block's - * bad block marker position; i.e., BBM == 11110111b is - * not bad when badblockbits == 7 - * @bits_per_cell: [INTERN] number of bits per cell. i.e., 1 means SLC. - * @ecc_strength_ds: [INTERN] ECC correctability from the datasheet. - * Minimum amount of bit errors per @ecc_step_ds guaranteed - * to be correctable. If unknown, set to zero. - * @ecc_step_ds: [INTERN] ECC step required by the @ecc_strength_ds, - * also from the datasheet. It is the recommended ECC step - * size, if known; if unknown, set to zero. - * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is - * set to the actually used ONFI mode if the chip is - * ONFI compliant or deduced from the datasheet if - * the NAND chip is not ONFI compliant. - * @numchips: [INTERN] number of physical chips - * @chipsize: [INTERN] the size of one chip for multichip arrays - * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 - * @pagebuf: [INTERN] holds the pagenumber which is currently in - * data_buf. - * @pagebuf_bitflips: [INTERN] holds the bitflip count for the page which is - * currently in data_buf. - * @subpagesize: [INTERN] holds the subpagesize - * @id: [INTERN] holds NAND ID - * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), - * non 0 if ONFI supported. - * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), - * non 0 if JEDEC supported. - * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is - * supported, 0 otherwise. - * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is - * supported, 0 otherwise. - * @max_bb_per_die: [INTERN] the max number of bad blocks each die of a - * this nand device will encounter their life times. - * @blocks_per_die: [INTERN] The number of PEBs in a die - * @data_interface: [INTERN] NAND interface timing information - * @read_retries: [INTERN] the number of read retry modes supported - * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand - * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand - * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If - * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this - * means the configuration should not be applied but - * only checked. - * @bbt: [INTERN] bad block table pointer - * @bbt_td: [REPLACEABLE] bad block table descriptor for flash - * lookup. - * @bbt_md: [REPLACEABLE] bad block table mirror descriptor - * @badblock_pattern: [REPLACEABLE] bad block scan pattern used for initial - * bad block scan. - * @controller: [REPLACEABLE] a pointer to a hardware controller - * structure which is shared among multiple independent - * devices. - * @priv: [OPTIONAL] pointer to private chip data - * @manufacturer: [INTERN] Contains manufacturer information - */ - -struct nand_chip { - struct mtd_info mtd; - void __iomem *IO_ADDR_R; - void __iomem *IO_ADDR_W; - - uint8_t (*read_byte)(struct mtd_info *mtd); - u16 (*read_word)(struct mtd_info *mtd); - void (*write_byte)(struct mtd_info *mtd, uint8_t byte); - void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); - void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); - void (*select_chip)(struct mtd_info *mtd, int chip); - int (*block_bad)(struct mtd_info *mtd, loff_t ofs); - int (*block_markbad)(struct mtd_info *mtd, loff_t ofs); - void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); - int (*dev_ready)(struct mtd_info *mtd); - void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, - int page_addr); - int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this); - int (*erase)(struct mtd_info *mtd, int page); - int (*scan_bbt)(struct mtd_info *mtd); - int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, - int feature_addr, uint8_t *subfeature_para); - int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, - int feature_addr, uint8_t *subfeature_para); - int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); - int (*setup_data_interface)(struct mtd_info *mtd, int chipnr, - const struct nand_data_interface *conf); - - - int chip_delay; - unsigned int options; - unsigned int bbt_options; - - int page_shift; - int phys_erase_shift; - int bbt_erase_shift; - int chip_shift; - int numchips; - uint64_t chipsize; - int pagemask; - int pagebuf; - unsigned int pagebuf_bitflips; - int subpagesize; - uint8_t bits_per_cell; - uint16_t ecc_strength_ds; - uint16_t ecc_step_ds; - int onfi_timing_mode_default; - int badblockpos; - int badblockbits; - - struct nand_id id; - int onfi_version; - int jedec_version; - union { - struct nand_onfi_params onfi_params; - struct nand_jedec_params jedec_params; - }; - u16 max_bb_per_die; - u32 blocks_per_die; - - struct nand_data_interface *data_interface; - - int read_retries; - - flstate_t state; - - uint8_t *oob_poi; - struct nand_hw_control *controller; - - struct nand_ecc_ctrl ecc; - struct nand_buffers *buffers; - unsigned long buf_align; - struct nand_hw_control hwcontrol; - - uint8_t *bbt; - struct nand_bbt_descr *bbt_td; - struct nand_bbt_descr *bbt_md; - - struct nand_bbt_descr *badblock_pattern; - - void *priv; - - struct { - const struct nand_manufacturer *desc; - void *priv; - } manufacturer; -}; - -extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; -extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; - -static inline void nand_set_flash_node(struct nand_chip *chip, - struct device_node *np) -{ - mtd_set_of_node(&chip->mtd, np); -} - -static inline struct device_node *nand_get_flash_node(struct nand_chip *chip) -{ - return mtd_get_of_node(&chip->mtd); -} - -static inline struct nand_chip *mtd_to_nand(struct mtd_info *mtd) -{ - return container_of(mtd, struct nand_chip, mtd); -} - -static inline struct mtd_info *nand_to_mtd(struct nand_chip *chip) -{ - return &chip->mtd; -} - -static inline void *nand_get_controller_data(struct nand_chip *chip) -{ - return chip->priv; -} - -static inline void nand_set_controller_data(struct nand_chip *chip, void *priv) -{ - chip->priv = priv; -} - -static inline void nand_set_manufacturer_data(struct nand_chip *chip, - void *priv) -{ - chip->manufacturer.priv = priv; -} - -static inline void *nand_get_manufacturer_data(struct nand_chip *chip) -{ - return chip->manufacturer.priv; -} - -/* - * NAND Flash Manufacturer ID Codes - */ -#define NAND_MFR_TOSHIBA 0x98 -#define NAND_MFR_ESMT 0xc8 -#define NAND_MFR_SAMSUNG 0xec -#define NAND_MFR_FUJITSU 0x04 -#define NAND_MFR_NATIONAL 0x8f -#define NAND_MFR_RENESAS 0x07 -#define NAND_MFR_STMICRO 0x20 -#define NAND_MFR_HYNIX 0xad -#define NAND_MFR_MICRON 0x2c -#define NAND_MFR_AMD 0x01 -#define NAND_MFR_MACRONIX 0xc2 -#define NAND_MFR_EON 0x92 -#define NAND_MFR_SANDISK 0x45 -#define NAND_MFR_INTEL 0x89 -#define NAND_MFR_ATO 0x9b -#define NAND_MFR_WINBOND 0xef - -/* The maximum expected count of bytes in the NAND ID sequence */ -#define NAND_MAX_ID_LEN 8 - -/* - * A helper for defining older NAND chips where the second ID byte fully - * defined the chip, including the geometry (chip size, eraseblock size, page - * size). All these chips have 512 bytes NAND page size. - */ -#define LEGACY_ID_NAND(nm, devid, chipsz, erasesz, opts) \ - { .name = (nm), {{ .dev_id = (devid) }}, .pagesize = 512, \ - .chipsize = (chipsz), .erasesize = (erasesz), .options = (opts) } - -/* - * A helper for defining newer chips which report their page size and - * eraseblock size via the extended ID bytes. - * - * The real difference between LEGACY_ID_NAND and EXTENDED_ID_NAND is that with - * EXTENDED_ID_NAND, manufacturers overloaded the same device ID so that the - * device ID now only represented a particular total chip size (and voltage, - * buswidth), and the page size, eraseblock size, and OOB size could vary while - * using the same device ID. - */ -#define EXTENDED_ID_NAND(nm, devid, chipsz, opts) \ - { .name = (nm), {{ .dev_id = (devid) }}, .chipsize = (chipsz), \ - .options = (opts) } - -#define NAND_ECC_INFO(_strength, _step) \ - { .strength_ds = (_strength), .step_ds = (_step) } -#define NAND_ECC_STRENGTH(type) ((type)->ecc.strength_ds) -#define NAND_ECC_STEP(type) ((type)->ecc.step_ds) - -/** - * struct nand_flash_dev - NAND Flash Device ID Structure - * @name: a human-readable name of the NAND chip - * @dev_id: the device ID (the second byte of the full chip ID array) - * @mfr_id: manufecturer ID part of the full chip ID array (refers the same - * memory address as @id[0]) - * @dev_id: device ID part of the full chip ID array (refers the same memory - * address as @id[1]) - * @id: full device ID array - * @pagesize: size of the NAND page in bytes; if 0, then the real page size (as - * well as the eraseblock size) is determined from the extended NAND - * chip ID array) - * @chipsize: total chip size in MiB - * @erasesize: eraseblock size in bytes (determined from the extended ID if 0) - * @options: stores various chip bit options - * @id_len: The valid length of the @id. - * @oobsize: OOB size - * @ecc: ECC correctability and step information from the datasheet. - * @ecc.strength_ds: The ECC correctability from the datasheet, same as the - * @ecc_strength_ds in nand_chip{}. - * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the - * @ecc_step_ds in nand_chip{}, also from the datasheet. - * For example, the "4bit ECC for each 512Byte" can be set with - * NAND_ECC_INFO(4, 512). - * @onfi_timing_mode_default: the default ONFI timing mode entered after a NAND - * reset. Should be deduced from timings described - * in the datasheet. - * - */ -struct nand_flash_dev { - char *name; - union { - struct { - uint8_t mfr_id; - uint8_t dev_id; - }; - uint8_t id[NAND_MAX_ID_LEN]; - }; - unsigned int pagesize; - unsigned int chipsize; - unsigned int erasesize; - unsigned int options; - uint16_t id_len; - uint16_t oobsize; - struct { - uint16_t strength_ds; - uint16_t step_ds; - } ecc; - int onfi_timing_mode_default; -}; - -/** - * struct nand_manufacturer - NAND Flash Manufacturer structure - * @name: Manufacturer name - * @id: manufacturer ID code of device. - * @ops: manufacturer operations -*/ -struct nand_manufacturer { - int id; - char *name; - const struct nand_manufacturer_ops *ops; -}; - -const struct nand_manufacturer *nand_get_manufacturer(u8 id); - -static inline const char * -nand_manufacturer_name(const struct nand_manufacturer *manufacturer) -{ - return manufacturer ? manufacturer->name : "Unknown"; -} - -extern struct nand_flash_dev nand_flash_ids[]; - -extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops; -extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; -extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; -extern const struct nand_manufacturer_ops micron_nand_manuf_ops; -extern const struct nand_manufacturer_ops amd_nand_manuf_ops; -extern const struct nand_manufacturer_ops macronix_nand_manuf_ops; - -int nand_default_bbt(struct mtd_info *mtd); -int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); -int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); -int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, - int allowbbt); -int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, uint8_t *buf); - -/** - * struct platform_nand_chip - chip level device structure - * @nr_chips: max. number of chips to scan for - * @chip_offset: chip number offset - * @nr_partitions: number of partitions pointed to by partitions (or zero) - * @partitions: mtd partition list - * @chip_delay: R/B delay value in us - * @options: Option flags, e.g. 16bit buswidth - * @bbt_options: BBT option flags, e.g. NAND_BBT_USE_FLASH - * @part_probe_types: NULL-terminated array of probe types - */ -struct platform_nand_chip { - int nr_chips; - int chip_offset; - int nr_partitions; - struct mtd_partition *partitions; - int chip_delay; - unsigned int options; - unsigned int bbt_options; - const char **part_probe_types; -}; - -/* Keep gcc happy */ -struct platform_device; - -/** - * struct platform_nand_ctrl - controller level device structure - * @probe: platform specific function to probe/setup hardware - * @remove: platform specific function to remove/teardown hardware - * @hwcontrol: platform specific hardware control structure - * @dev_ready: platform specific function to read ready/busy pin - * @select_chip: platform specific chip select function - * @cmd_ctrl: platform specific function for controlling - * ALE/CLE/nCE. Also used to write command and address - * @write_buf: platform specific function for write buffer - * @read_buf: platform specific function for read buffer - * @read_byte: platform specific function to read one byte from chip - * @priv: private data to transport driver specific settings - * - * All fields are optional and depend on the hardware driver requirements - */ -struct platform_nand_ctrl { - int (*probe)(struct platform_device *pdev); - void (*remove)(struct platform_device *pdev); - void (*hwcontrol)(struct mtd_info *mtd, int cmd); - int (*dev_ready)(struct mtd_info *mtd); - void (*select_chip)(struct mtd_info *mtd, int chip); - void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); - void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); - void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); - unsigned char (*read_byte)(struct mtd_info *mtd); - void *priv; -}; - -/** - * struct platform_nand_data - container structure for platform-specific data - * @chip: chip level chip structure - * @ctrl: controller level device structure - */ -struct platform_nand_data { - struct platform_nand_chip chip; - struct platform_nand_ctrl ctrl; -}; - -/* return the supported features. */ -static inline int onfi_feature(struct nand_chip *chip) -{ - return chip->onfi_version ? le16_to_cpu(chip->onfi_params.features) : 0; -} - -/* return the supported asynchronous timing mode. */ -static inline int onfi_get_async_timing_mode(struct nand_chip *chip) -{ - if (!chip->onfi_version) - return ONFI_TIMING_MODE_UNKNOWN; - return le16_to_cpu(chip->onfi_params.async_timing_mode); -} - -/* return the supported synchronous timing mode. */ -static inline int onfi_get_sync_timing_mode(struct nand_chip *chip) -{ - if (!chip->onfi_version) - return ONFI_TIMING_MODE_UNKNOWN; - return le16_to_cpu(chip->onfi_params.src_sync_timing_mode); -} - -int onfi_init_data_interface(struct nand_chip *chip, - struct nand_data_interface *iface, - enum nand_data_interface_type type, - int timing_mode); - -/* - * Check if it is a SLC nand. - * The !nand_is_slc() can be used to check the MLC/TLC nand chips. - * We do not distinguish the MLC and TLC now. - */ -static inline bool nand_is_slc(struct nand_chip *chip) -{ - return chip->bits_per_cell == 1; -} - -/** - * Check if the opcode's address should be sent only on the lower 8 bits - * @command: opcode to check - */ -static inline int nand_opcode_8bits(unsigned int command) -{ - switch (command) { - case NAND_CMD_READID: - case NAND_CMD_PARAM: - case NAND_CMD_GET_FEATURES: - case NAND_CMD_SET_FEATURES: - return 1; - default: - break; - } - return 0; -} - -/* return the supported JEDEC features. */ -static inline int jedec_feature(struct nand_chip *chip) -{ - return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features) - : 0; -} - -/* get timing characteristics from ONFI timing mode. */ -const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); -/* get data interface from ONFI timing mode 0, used after reset. */ -const struct nand_data_interface *nand_get_default_data_interface(void); - -int nand_check_erased_ecc_chunk(void *data, int datalen, - void *ecc, int ecclen, - void *extraoob, int extraooblen, - int threshold); - -int nand_check_ecc_caps(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -int nand_match_ecc_req(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -int nand_maximize_ecc(struct nand_chip *chip, - const struct nand_ecc_caps *caps, int oobavail); - -/* Default write_oob implementation */ -int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); - -/* Default write_oob syndrome implementation */ -int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, - int page); - -/* Default read_oob implementation */ -int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); - -/* Default read_oob syndrome implementation */ -int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, - int page); - -/* Stub used by drivers that do not support GET/SET FEATURES operations */ -int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd, - struct nand_chip *chip, int addr, - u8 *subfeature_param); - -/* Default read_page_raw implementation */ -int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf, int oob_required, int page); - -/* Default write_page_raw implementation */ -int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required, int page); - -/* Reset and initialize a NAND device */ -int nand_reset(struct nand_chip *chip, int chipnr); - -/* Free resources held by the NAND device */ -void nand_cleanup(struct nand_chip *chip); - -/* Default extended ID decoding function */ -void nand_decode_ext_id(struct nand_chip *chip); -#endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h new file mode 100644 index 000000000000..b371dc0914a7 --- /dev/null +++ b/include/linux/mtd/rawnand.h @@ -0,0 +1,1329 @@ +/* + * Copyright © 2000-2010 David Woodhouse + * Steven J. Hill + * Thomas Gleixner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Info: + * Contains standard defines and IDs for NAND flash devices + * + * Changelog: + * See git changelog. + */ +#ifndef __LINUX_MTD_RAWNAND_H +#define __LINUX_MTD_RAWNAND_H + +#include +#include +#include +#include +#include + +struct mtd_info; +struct nand_flash_dev; +struct device_node; + +/* Scan and identify a NAND device */ +int nand_scan(struct mtd_info *mtd, int max_chips); +/* + * Separate phases of nand_scan(), allowing board driver to intervene + * and override command or ECC setup according to flash type. + */ +int nand_scan_ident(struct mtd_info *mtd, int max_chips, + struct nand_flash_dev *table); +int nand_scan_tail(struct mtd_info *mtd); + +/* Unregister the MTD device and free resources held by the NAND device */ +void nand_release(struct mtd_info *mtd); + +/* Internal helper for board drivers which need to override command function */ +void nand_wait_ready(struct mtd_info *mtd); + +/* locks all blocks present in the device */ +int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); + +/* unlocks specified locked blocks */ +int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); + +/* The maximum number of NAND chips in an array */ +#define NAND_MAX_CHIPS 8 + +/* + * Constants for hardware specific CLE/ALE/NCE function + * + * These are bits which can be or'ed to set/clear multiple + * bits in one go. + */ +/* Select the chip by setting nCE to low */ +#define NAND_NCE 0x01 +/* Select the command latch by setting CLE to high */ +#define NAND_CLE 0x02 +/* Select the address latch by setting ALE to high */ +#define NAND_ALE 0x04 + +#define NAND_CTRL_CLE (NAND_NCE | NAND_CLE) +#define NAND_CTRL_ALE (NAND_NCE | NAND_ALE) +#define NAND_CTRL_CHANGE 0x80 + +/* + * Standard NAND flash commands + */ +#define NAND_CMD_READ0 0 +#define NAND_CMD_READ1 1 +#define NAND_CMD_RNDOUT 5 +#define NAND_CMD_PAGEPROG 0x10 +#define NAND_CMD_READOOB 0x50 +#define NAND_CMD_ERASE1 0x60 +#define NAND_CMD_STATUS 0x70 +#define NAND_CMD_SEQIN 0x80 +#define NAND_CMD_RNDIN 0x85 +#define NAND_CMD_READID 0x90 +#define NAND_CMD_ERASE2 0xd0 +#define NAND_CMD_PARAM 0xec +#define NAND_CMD_GET_FEATURES 0xee +#define NAND_CMD_SET_FEATURES 0xef +#define NAND_CMD_RESET 0xff + +#define NAND_CMD_LOCK 0x2a +#define NAND_CMD_UNLOCK1 0x23 +#define NAND_CMD_UNLOCK2 0x24 + +/* Extended commands for large page devices */ +#define NAND_CMD_READSTART 0x30 +#define NAND_CMD_RNDOUTSTART 0xE0 +#define NAND_CMD_CACHEDPROG 0x15 + +#define NAND_CMD_NONE -1 + +/* Status bits */ +#define NAND_STATUS_FAIL 0x01 +#define NAND_STATUS_FAIL_N1 0x02 +#define NAND_STATUS_TRUE_READY 0x20 +#define NAND_STATUS_READY 0x40 +#define NAND_STATUS_WP 0x80 + +#define NAND_DATA_IFACE_CHECK_ONLY -1 + +/* + * Constants for ECC_MODES + */ +typedef enum { + NAND_ECC_NONE, + NAND_ECC_SOFT, + NAND_ECC_HW, + NAND_ECC_HW_SYNDROME, + NAND_ECC_HW_OOB_FIRST, + NAND_ECC_ON_DIE, +} nand_ecc_modes_t; + +enum nand_ecc_algo { + NAND_ECC_UNKNOWN, + NAND_ECC_HAMMING, + NAND_ECC_BCH, +}; + +/* + * Constants for Hardware ECC + */ +/* Reset Hardware ECC for read */ +#define NAND_ECC_READ 0 +/* Reset Hardware ECC for write */ +#define NAND_ECC_WRITE 1 +/* Enable Hardware ECC before syndrome is read back from flash */ +#define NAND_ECC_READSYN 2 + +/* + * Enable generic NAND 'page erased' check. This check is only done when + * ecc.correct() returns -EBADMSG. + * Set this flag if your implementation does not fix bitflips in erased + * pages and you want to rely on the default implementation. + */ +#define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) +#define NAND_ECC_MAXIMIZE BIT(1) +/* + * If your controller already sends the required NAND commands when + * reading or writing a page, then the framework is not supposed to + * send READ0 and SEQIN/PAGEPROG respectively. + */ +#define NAND_ECC_CUSTOM_PAGE_ACCESS BIT(2) + +/* Bit mask for flags passed to do_nand_read_ecc */ +#define NAND_GET_DEVICE 0x80 + + +/* + * Option constants for bizarre disfunctionality and real + * features. + */ +/* Buswidth is 16 bit */ +#define NAND_BUSWIDTH_16 0x00000002 +/* Chip has cache program function */ +#define NAND_CACHEPRG 0x00000008 +/* + * Chip requires ready check on read (for auto-incremented sequential read). + * True only for small page devices; large page devices do not support + * autoincrement. + */ +#define NAND_NEED_READRDY 0x00000100 + +/* Chip does not allow subpage writes */ +#define NAND_NO_SUBPAGE_WRITE 0x00000200 + +/* Device is one of 'new' xD cards that expose fake nand command set */ +#define NAND_BROKEN_XD 0x00000400 + +/* Device behaves just like nand, but is readonly */ +#define NAND_ROM 0x00000800 + +/* Device supports subpage reads */ +#define NAND_SUBPAGE_READ 0x00001000 + +/* + * Some MLC NANDs need data scrambling to limit bitflips caused by repeated + * patterns. + */ +#define NAND_NEED_SCRAMBLING 0x00002000 + +/* Options valid for Samsung large page devices */ +#define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG + +/* Macros to identify the above */ +#define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG)) +#define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ)) +#define NAND_HAS_SUBPAGE_WRITE(chip) !((chip)->options & NAND_NO_SUBPAGE_WRITE) + +/* Non chip related options */ +/* This option skips the bbt scan during initialization. */ +#define NAND_SKIP_BBTSCAN 0x00010000 +/* + * This option is defined if the board driver allocates its own buffers + * (e.g. because it needs them DMA-coherent). + */ +#define NAND_OWN_BUFFERS 0x00020000 +/* Chip may not exist, so silence any errors in scan */ +#define NAND_SCAN_SILENT_NODEV 0x00040000 +/* + * Autodetect nand buswidth with readid/onfi. + * This suppose the driver will configure the hardware in 8 bits mode + * when calling nand_scan_ident, and update its configuration + * before calling nand_scan_tail. + */ +#define NAND_BUSWIDTH_AUTO 0x00080000 +/* + * This option could be defined by controller drivers to protect against + * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers + */ +#define NAND_USE_BOUNCE_BUFFER 0x00100000 + +/* + * In case your controller is implementing ->cmd_ctrl() and is relying on the + * default ->cmdfunc() implementation, you may want to let the core handle the + * tCCS delay which is required when a column change (RNDIN or RNDOUT) is + * requested. + * If your controller already takes care of this delay, you don't need to set + * this flag. + */ +#define NAND_WAIT_TCCS 0x00200000 + +/* Options set by nand scan */ +/* Nand scan has allocated controller struct */ +#define NAND_CONTROLLER_ALLOC 0x80000000 + +/* Cell info constants */ +#define NAND_CI_CHIPNR_MSK 0x03 +#define NAND_CI_CELLTYPE_MSK 0x0C +#define NAND_CI_CELLTYPE_SHIFT 2 + +/* Keep gcc happy */ +struct nand_chip; + +/* ONFI features */ +#define ONFI_FEATURE_16_BIT_BUS (1 << 0) +#define ONFI_FEATURE_EXT_PARAM_PAGE (1 << 7) + +/* ONFI timing mode, used in both asynchronous and synchronous mode */ +#define ONFI_TIMING_MODE_0 (1 << 0) +#define ONFI_TIMING_MODE_1 (1 << 1) +#define ONFI_TIMING_MODE_2 (1 << 2) +#define ONFI_TIMING_MODE_3 (1 << 3) +#define ONFI_TIMING_MODE_4 (1 << 4) +#define ONFI_TIMING_MODE_5 (1 << 5) +#define ONFI_TIMING_MODE_UNKNOWN (1 << 6) + +/* ONFI feature address */ +#define ONFI_FEATURE_ADDR_TIMING_MODE 0x1 + +/* Vendor-specific feature address (Micron) */ +#define ONFI_FEATURE_ADDR_READ_RETRY 0x89 +#define ONFI_FEATURE_ON_DIE_ECC 0x90 +#define ONFI_FEATURE_ON_DIE_ECC_EN BIT(3) + +/* ONFI subfeature parameters length */ +#define ONFI_SUBFEATURE_PARAM_LEN 4 + +/* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_SET_GET_FEATURES (1 << 2) + +struct nand_onfi_params { + /* rev info and features block */ + /* 'O' 'N' 'F' 'I' */ + u8 sig[4]; + __le16 revision; + __le16 features; + __le16 opt_cmd; + u8 reserved0[2]; + __le16 ext_param_page_length; /* since ONFI 2.1 */ + u8 num_of_param_pages; /* since ONFI 2.1 */ + u8 reserved1[17]; + + /* manufacturer information block */ + char manufacturer[12]; + char model[20]; + u8 jedec_id; + __le16 date_code; + u8 reserved2[13]; + + /* memory organization block */ + __le32 byte_per_page; + __le16 spare_bytes_per_page; + __le32 data_bytes_per_ppage; + __le16 spare_bytes_per_ppage; + __le32 pages_per_block; + __le32 blocks_per_lun; + u8 lun_count; + u8 addr_cycles; + u8 bits_per_cell; + __le16 bb_per_lun; + __le16 block_endurance; + u8 guaranteed_good_blocks; + __le16 guaranteed_block_endurance; + u8 programs_per_page; + u8 ppage_attr; + u8 ecc_bits; + u8 interleaved_bits; + u8 interleaved_ops; + u8 reserved3[13]; + + /* electrical parameter block */ + u8 io_pin_capacitance_max; + __le16 async_timing_mode; + __le16 program_cache_timing_mode; + __le16 t_prog; + __le16 t_bers; + __le16 t_r; + __le16 t_ccs; + __le16 src_sync_timing_mode; + u8 src_ssync_features; + __le16 clk_pin_capacitance_typ; + __le16 io_pin_capacitance_typ; + __le16 input_pin_capacitance_typ; + u8 input_pin_capacitance_max; + u8 driver_strength_support; + __le16 t_int_r; + __le16 t_adl; + u8 reserved4[8]; + + /* vendor */ + __le16 vendor_revision; + u8 vendor[88]; + + __le16 crc; +} __packed; + +#define ONFI_CRC_BASE 0x4F4E + +/* Extended ECC information Block Definition (since ONFI 2.1) */ +struct onfi_ext_ecc_info { + u8 ecc_bits; + u8 codeword_size; + __le16 bb_per_lun; + __le16 block_endurance; + u8 reserved[2]; +} __packed; + +#define ONFI_SECTION_TYPE_0 0 /* Unused section. */ +#define ONFI_SECTION_TYPE_1 1 /* for additional sections. */ +#define ONFI_SECTION_TYPE_2 2 /* for ECC information. */ +struct onfi_ext_section { + u8 type; + u8 length; +} __packed; + +#define ONFI_EXT_SECTION_MAX 8 + +/* Extended Parameter Page Definition (since ONFI 2.1) */ +struct onfi_ext_param_page { + __le16 crc; + u8 sig[4]; /* 'E' 'P' 'P' 'S' */ + u8 reserved0[10]; + struct onfi_ext_section sections[ONFI_EXT_SECTION_MAX]; + + /* + * The actual size of the Extended Parameter Page is in + * @ext_param_page_length of nand_onfi_params{}. + * The following are the variable length sections. + * So we do not add any fields below. Please see the ONFI spec. + */ +} __packed; + +struct jedec_ecc_info { + u8 ecc_bits; + u8 codeword_size; + __le16 bb_per_lun; + __le16 block_endurance; + u8 reserved[2]; +} __packed; + +/* JEDEC features */ +#define JEDEC_FEATURE_16_BIT_BUS (1 << 0) + +struct nand_jedec_params { + /* rev info and features block */ + /* 'J' 'E' 'S' 'D' */ + u8 sig[4]; + __le16 revision; + __le16 features; + u8 opt_cmd[3]; + __le16 sec_cmd; + u8 num_of_param_pages; + u8 reserved0[18]; + + /* manufacturer information block */ + char manufacturer[12]; + char model[20]; + u8 jedec_id[6]; + u8 reserved1[10]; + + /* memory organization block */ + __le32 byte_per_page; + __le16 spare_bytes_per_page; + u8 reserved2[6]; + __le32 pages_per_block; + __le32 blocks_per_lun; + u8 lun_count; + u8 addr_cycles; + u8 bits_per_cell; + u8 programs_per_page; + u8 multi_plane_addr; + u8 multi_plane_op_attr; + u8 reserved3[38]; + + /* electrical parameter block */ + __le16 async_sdr_speed_grade; + __le16 toggle_ddr_speed_grade; + __le16 sync_ddr_speed_grade; + u8 async_sdr_features; + u8 toggle_ddr_features; + u8 sync_ddr_features; + __le16 t_prog; + __le16 t_bers; + __le16 t_r; + __le16 t_r_multi_plane; + __le16 t_ccs; + __le16 io_pin_capacitance_typ; + __le16 input_pin_capacitance_typ; + __le16 clk_pin_capacitance_typ; + u8 driver_strength_support; + __le16 t_adl; + u8 reserved4[36]; + + /* ECC and endurance block */ + u8 guaranteed_good_blocks; + __le16 guaranteed_block_endurance; + struct jedec_ecc_info ecc_info[4]; + u8 reserved5[29]; + + /* reserved */ + u8 reserved6[148]; + + /* vendor */ + __le16 vendor_rev_num; + u8 reserved7[88]; + + /* CRC for Parameter Page */ + __le16 crc; +} __packed; + +/** + * struct nand_id - NAND id structure + * @data: buffer containing the id bytes. Currently 8 bytes large, but can + * be extended if required. + * @len: ID length. + */ +struct nand_id { + u8 data[8]; + int len; +}; + +/** + * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices + * @lock: protection lock + * @active: the mtd device which holds the controller currently + * @wq: wait queue to sleep on if a NAND operation is in + * progress used instead of the per chip wait queue + * when a hw controller is available. + */ +struct nand_hw_control { + spinlock_t lock; + struct nand_chip *active; + wait_queue_head_t wq; +}; + +static inline void nand_hw_control_init(struct nand_hw_control *nfc) +{ + nfc->active = NULL; + spin_lock_init(&nfc->lock); + init_waitqueue_head(&nfc->wq); +} + +/** + * struct nand_ecc_step_info - ECC step information of ECC engine + * @stepsize: data bytes per ECC step + * @strengths: array of supported strengths + * @nstrengths: number of supported strengths + */ +struct nand_ecc_step_info { + int stepsize; + const int *strengths; + int nstrengths; +}; + +/** + * struct nand_ecc_caps - capability of ECC engine + * @stepinfos: array of ECC step information + * @nstepinfos: number of ECC step information + * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step + */ +struct nand_ecc_caps { + const struct nand_ecc_step_info *stepinfos; + int nstepinfos; + int (*calc_ecc_bytes)(int step_size, int strength); +}; + +/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */ +#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...) \ +static const int __name##_strengths[] = { __VA_ARGS__ }; \ +static const struct nand_ecc_step_info __name##_stepinfo = { \ + .stepsize = __step, \ + .strengths = __name##_strengths, \ + .nstrengths = ARRAY_SIZE(__name##_strengths), \ +}; \ +static const struct nand_ecc_caps __name = { \ + .stepinfos = &__name##_stepinfo, \ + .nstepinfos = 1, \ + .calc_ecc_bytes = __calc, \ +} + +/** + * struct nand_ecc_ctrl - Control structure for ECC + * @mode: ECC mode + * @algo: ECC algorithm + * @steps: number of ECC steps per page + * @size: data bytes per ECC step + * @bytes: ECC bytes per step + * @strength: max number of correctible bits per ECC step + * @total: total number of ECC bytes per page + * @prepad: padding information for syndrome based ECC generators + * @postpad: padding information for syndrome based ECC generators + * @options: ECC specific options (see NAND_ECC_XXX flags defined above) + * @priv: pointer to private ECC control data + * @hwctl: function to control hardware ECC generator. Must only + * be provided if an hardware ECC is available + * @calculate: function for ECC calculation or readback from ECC hardware + * @correct: function for ECC correction, matching to ECC generator (sw/hw). + * Should return a positive number representing the number of + * corrected bitflips, -EBADMSG if the number of bitflips exceed + * ECC strength, or any other error code if the error is not + * directly related to correction. + * If -EBADMSG is returned the input buffers should be left + * untouched. + * @read_page_raw: function to read a raw page without ECC. This function + * should hide the specific layout used by the ECC + * controller and always return contiguous in-band and + * out-of-band data even if they're not stored + * contiguously on the NAND chip (e.g. + * NAND_ECC_HW_SYNDROME interleaves in-band and + * out-of-band data). + * @write_page_raw: function to write a raw page without ECC. This function + * should hide the specific layout used by the ECC + * controller and consider the passed data as contiguous + * in-band and out-of-band data. ECC controller is + * responsible for doing the appropriate transformations + * to adapt to its specific layout (e.g. + * NAND_ECC_HW_SYNDROME interleaves in-band and + * out-of-band data). + * @read_page: function to read a page according to the ECC generator + * requirements; returns maximum number of bitflips corrected in + * any single ECC step, -EIO hw error + * @read_subpage: function to read parts of the page covered by ECC; + * returns same as read_page() + * @write_subpage: function to write parts of the page covered by ECC. + * @write_page: function to write a page according to the ECC generator + * requirements. + * @write_oob_raw: function to write chip OOB data without ECC + * @read_oob_raw: function to read chip OOB data without ECC + * @read_oob: function to read chip OOB data + * @write_oob: function to write chip OOB data + */ +struct nand_ecc_ctrl { + nand_ecc_modes_t mode; + enum nand_ecc_algo algo; + int steps; + int size; + int bytes; + int total; + int strength; + int prepad; + int postpad; + unsigned int options; + void *priv; + void (*hwctl)(struct mtd_info *mtd, int mode); + int (*calculate)(struct mtd_info *mtd, const uint8_t *dat, + uint8_t *ecc_code); + int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc, + uint8_t *calc_ecc); + int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page); + int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required, int page); + int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page); + int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, + uint32_t offs, uint32_t len, uint8_t *buf, int page); + int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip, + uint32_t offset, uint32_t data_len, + const uint8_t *data_buf, int oob_required, int page); + int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required, int page); + int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, + int page); + int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, + int page); + int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page); + int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip, + int page); +}; + +static inline int nand_standard_page_accessors(struct nand_ecc_ctrl *ecc) +{ + return !(ecc->options & NAND_ECC_CUSTOM_PAGE_ACCESS); +} + +/** + * struct nand_buffers - buffer structure for read/write + * @ecccalc: buffer pointer for calculated ECC, size is oobsize. + * @ecccode: buffer pointer for ECC read from flash, size is oobsize. + * @databuf: buffer pointer for data, size is (page size + oobsize). + * + * Do not change the order of buffers. databuf and oobrbuf must be in + * consecutive order. + */ +struct nand_buffers { + uint8_t *ecccalc; + uint8_t *ecccode; + uint8_t *databuf; +}; + +/** + * struct nand_sdr_timings - SDR NAND chip timings + * + * This struct defines the timing requirements of a SDR NAND chip. + * These information can be found in every NAND datasheets and the timings + * meaning are described in the ONFI specifications: + * www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf (chapter 4.15 Timing + * Parameters) + * + * All these timings are expressed in picoseconds. + * + * @tBERS_max: Block erase time + * @tCCS_min: Change column setup time + * @tPROG_max: Page program time + * @tR_max: Page read time + * @tALH_min: ALE hold time + * @tADL_min: ALE to data loading time + * @tALS_min: ALE setup time + * @tAR_min: ALE to RE# delay + * @tCEA_max: CE# access time + * @tCEH_min: CE# high hold time + * @tCH_min: CE# hold time + * @tCHZ_max: CE# high to output hi-Z + * @tCLH_min: CLE hold time + * @tCLR_min: CLE to RE# delay + * @tCLS_min: CLE setup time + * @tCOH_min: CE# high to output hold + * @tCS_min: CE# setup time + * @tDH_min: Data hold time + * @tDS_min: Data setup time + * @tFEAT_max: Busy time for Set Features and Get Features + * @tIR_min: Output hi-Z to RE# low + * @tITC_max: Interface and Timing Mode Change time + * @tRC_min: RE# cycle time + * @tREA_max: RE# access time + * @tREH_min: RE# high hold time + * @tRHOH_min: RE# high to output hold + * @tRHW_min: RE# high to WE# low + * @tRHZ_max: RE# high to output hi-Z + * @tRLOH_min: RE# low to output hold + * @tRP_min: RE# pulse width + * @tRR_min: Ready to RE# low (data only) + * @tRST_max: Device reset time, measured from the falling edge of R/B# to the + * rising edge of R/B#. + * @tWB_max: WE# high to SR[6] low + * @tWC_min: WE# cycle time + * @tWH_min: WE# high hold time + * @tWHR_min: WE# high to RE# low + * @tWP_min: WE# pulse width + * @tWW_min: WP# transition to WE# low + */ +struct nand_sdr_timings { + u32 tBERS_max; + u32 tCCS_min; + u32 tPROG_max; + u32 tR_max; + u32 tALH_min; + u32 tADL_min; + u32 tALS_min; + u32 tAR_min; + u32 tCEA_max; + u32 tCEH_min; + u32 tCH_min; + u32 tCHZ_max; + u32 tCLH_min; + u32 tCLR_min; + u32 tCLS_min; + u32 tCOH_min; + u32 tCS_min; + u32 tDH_min; + u32 tDS_min; + u32 tFEAT_max; + u32 tIR_min; + u32 tITC_max; + u32 tRC_min; + u32 tREA_max; + u32 tREH_min; + u32 tRHOH_min; + u32 tRHW_min; + u32 tRHZ_max; + u32 tRLOH_min; + u32 tRP_min; + u32 tRR_min; + u64 tRST_max; + u32 tWB_max; + u32 tWC_min; + u32 tWH_min; + u32 tWHR_min; + u32 tWP_min; + u32 tWW_min; +}; + +/** + * enum nand_data_interface_type - NAND interface timing type + * @NAND_SDR_IFACE: Single Data Rate interface + */ +enum nand_data_interface_type { + NAND_SDR_IFACE, +}; + +/** + * struct nand_data_interface - NAND interface timing + * @type: type of the timing + * @timings: The timing, type according to @type + */ +struct nand_data_interface { + enum nand_data_interface_type type; + union { + struct nand_sdr_timings sdr; + } timings; +}; + +/** + * nand_get_sdr_timings - get SDR timing from data interface + * @conf: The data interface + */ +static inline const struct nand_sdr_timings * +nand_get_sdr_timings(const struct nand_data_interface *conf) +{ + if (conf->type != NAND_SDR_IFACE) + return ERR_PTR(-EINVAL); + + return &conf->timings.sdr; +} + +/** + * struct nand_manufacturer_ops - NAND Manufacturer operations + * @detect: detect the NAND memory organization and capabilities + * @init: initialize all vendor specific fields (like the ->read_retry() + * implementation) if any. + * @cleanup: the ->init() function may have allocated resources, ->cleanup() + * is here to let vendor specific code release those resources. + */ +struct nand_manufacturer_ops { + void (*detect)(struct nand_chip *chip); + int (*init)(struct nand_chip *chip); + void (*cleanup)(struct nand_chip *chip); +}; + +/** + * struct nand_chip - NAND Private Flash Chip Data + * @mtd: MTD device registered to the MTD framework + * @IO_ADDR_R: [BOARDSPECIFIC] address to read the 8 I/O lines of the + * flash device + * @IO_ADDR_W: [BOARDSPECIFIC] address to write the 8 I/O lines of the + * flash device. + * @read_byte: [REPLACEABLE] read one byte from the chip + * @read_word: [REPLACEABLE] read one word from the chip + * @write_byte: [REPLACEABLE] write a single byte to the chip on the + * low 8 I/O lines + * @write_buf: [REPLACEABLE] write data from the buffer to the chip + * @read_buf: [REPLACEABLE] read data from the chip into the buffer + * @select_chip: [REPLACEABLE] select chip nr + * @block_bad: [REPLACEABLE] check if a block is bad, using OOB markers + * @block_markbad: [REPLACEABLE] mark a block bad + * @cmd_ctrl: [BOARDSPECIFIC] hardwarespecific function for controlling + * ALE/CLE/nCE. Also used to write command and address + * @dev_ready: [BOARDSPECIFIC] hardwarespecific function for accessing + * device ready/busy line. If set to NULL no access to + * ready/busy is available and the ready/busy information + * is read from the chip status register. + * @cmdfunc: [REPLACEABLE] hardwarespecific function for writing + * commands to the chip. + * @waitfunc: [REPLACEABLE] hardwarespecific function for wait on + * ready. + * @setup_read_retry: [FLASHSPECIFIC] flash (vendor) specific function for + * setting the read-retry mode. Mostly needed for MLC NAND. + * @ecc: [BOARDSPECIFIC] ECC control structure + * @buffers: buffer structure for read/write + * @buf_align: minimum buffer alignment required by a platform + * @hwcontrol: platform-specific hardware control structure + * @erase: [REPLACEABLE] erase function + * @scan_bbt: [REPLACEABLE] function to scan bad block table + * @chip_delay: [BOARDSPECIFIC] chip dependent delay for transferring + * data from array to read regs (tR). + * @state: [INTERN] the current state of the NAND device + * @oob_poi: "poison value buffer," used for laying out OOB data + * before writing + * @page_shift: [INTERN] number of address bits in a page (column + * address bits). + * @phys_erase_shift: [INTERN] number of address bits in a physical eraseblock + * @bbt_erase_shift: [INTERN] number of address bits in a bbt entry + * @chip_shift: [INTERN] number of address bits in one chip + * @options: [BOARDSPECIFIC] various chip options. They can partly + * be set to inform nand_scan about special functionality. + * See the defines for further explanation. + * @bbt_options: [INTERN] bad block specific options. All options used + * here must come from bbm.h. By default, these options + * will be copied to the appropriate nand_bbt_descr's. + * @badblockpos: [INTERN] position of the bad block marker in the oob + * area. + * @badblockbits: [INTERN] minimum number of set bits in a good block's + * bad block marker position; i.e., BBM == 11110111b is + * not bad when badblockbits == 7 + * @bits_per_cell: [INTERN] number of bits per cell. i.e., 1 means SLC. + * @ecc_strength_ds: [INTERN] ECC correctability from the datasheet. + * Minimum amount of bit errors per @ecc_step_ds guaranteed + * to be correctable. If unknown, set to zero. + * @ecc_step_ds: [INTERN] ECC step required by the @ecc_strength_ds, + * also from the datasheet. It is the recommended ECC step + * size, if known; if unknown, set to zero. + * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is + * set to the actually used ONFI mode if the chip is + * ONFI compliant or deduced from the datasheet if + * the NAND chip is not ONFI compliant. + * @numchips: [INTERN] number of physical chips + * @chipsize: [INTERN] the size of one chip for multichip arrays + * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 + * @pagebuf: [INTERN] holds the pagenumber which is currently in + * data_buf. + * @pagebuf_bitflips: [INTERN] holds the bitflip count for the page which is + * currently in data_buf. + * @subpagesize: [INTERN] holds the subpagesize + * @id: [INTERN] holds NAND ID + * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), + * non 0 if ONFI supported. + * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), + * non 0 if JEDEC supported. + * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is + * supported, 0 otherwise. + * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is + * supported, 0 otherwise. + * @max_bb_per_die: [INTERN] the max number of bad blocks each die of a + * this nand device will encounter their life times. + * @blocks_per_die: [INTERN] The number of PEBs in a die + * @data_interface: [INTERN] NAND interface timing information + * @read_retries: [INTERN] the number of read retry modes supported + * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand + * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand + * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If + * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this + * means the configuration should not be applied but + * only checked. + * @bbt: [INTERN] bad block table pointer + * @bbt_td: [REPLACEABLE] bad block table descriptor for flash + * lookup. + * @bbt_md: [REPLACEABLE] bad block table mirror descriptor + * @badblock_pattern: [REPLACEABLE] bad block scan pattern used for initial + * bad block scan. + * @controller: [REPLACEABLE] a pointer to a hardware controller + * structure which is shared among multiple independent + * devices. + * @priv: [OPTIONAL] pointer to private chip data + * @manufacturer: [INTERN] Contains manufacturer information + */ + +struct nand_chip { + struct mtd_info mtd; + void __iomem *IO_ADDR_R; + void __iomem *IO_ADDR_W; + + uint8_t (*read_byte)(struct mtd_info *mtd); + u16 (*read_word)(struct mtd_info *mtd); + void (*write_byte)(struct mtd_info *mtd, uint8_t byte); + void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); + void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); + void (*select_chip)(struct mtd_info *mtd, int chip); + int (*block_bad)(struct mtd_info *mtd, loff_t ofs); + int (*block_markbad)(struct mtd_info *mtd, loff_t ofs); + void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); + int (*dev_ready)(struct mtd_info *mtd); + void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, + int page_addr); + int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this); + int (*erase)(struct mtd_info *mtd, int page); + int (*scan_bbt)(struct mtd_info *mtd); + int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, + int feature_addr, uint8_t *subfeature_para); + int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, + int feature_addr, uint8_t *subfeature_para); + int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); + int (*setup_data_interface)(struct mtd_info *mtd, int chipnr, + const struct nand_data_interface *conf); + + + int chip_delay; + unsigned int options; + unsigned int bbt_options; + + int page_shift; + int phys_erase_shift; + int bbt_erase_shift; + int chip_shift; + int numchips; + uint64_t chipsize; + int pagemask; + int pagebuf; + unsigned int pagebuf_bitflips; + int subpagesize; + uint8_t bits_per_cell; + uint16_t ecc_strength_ds; + uint16_t ecc_step_ds; + int onfi_timing_mode_default; + int badblockpos; + int badblockbits; + + struct nand_id id; + int onfi_version; + int jedec_version; + union { + struct nand_onfi_params onfi_params; + struct nand_jedec_params jedec_params; + }; + u16 max_bb_per_die; + u32 blocks_per_die; + + struct nand_data_interface *data_interface; + + int read_retries; + + flstate_t state; + + uint8_t *oob_poi; + struct nand_hw_control *controller; + + struct nand_ecc_ctrl ecc; + struct nand_buffers *buffers; + unsigned long buf_align; + struct nand_hw_control hwcontrol; + + uint8_t *bbt; + struct nand_bbt_descr *bbt_td; + struct nand_bbt_descr *bbt_md; + + struct nand_bbt_descr *badblock_pattern; + + void *priv; + + struct { + const struct nand_manufacturer *desc; + void *priv; + } manufacturer; +}; + +extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; +extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; + +static inline void nand_set_flash_node(struct nand_chip *chip, + struct device_node *np) +{ + mtd_set_of_node(&chip->mtd, np); +} + +static inline struct device_node *nand_get_flash_node(struct nand_chip *chip) +{ + return mtd_get_of_node(&chip->mtd); +} + +static inline struct nand_chip *mtd_to_nand(struct mtd_info *mtd) +{ + return container_of(mtd, struct nand_chip, mtd); +} + +static inline struct mtd_info *nand_to_mtd(struct nand_chip *chip) +{ + return &chip->mtd; +} + +static inline void *nand_get_controller_data(struct nand_chip *chip) +{ + return chip->priv; +} + +static inline void nand_set_controller_data(struct nand_chip *chip, void *priv) +{ + chip->priv = priv; +} + +static inline void nand_set_manufacturer_data(struct nand_chip *chip, + void *priv) +{ + chip->manufacturer.priv = priv; +} + +static inline void *nand_get_manufacturer_data(struct nand_chip *chip) +{ + return chip->manufacturer.priv; +} + +/* + * NAND Flash Manufacturer ID Codes + */ +#define NAND_MFR_TOSHIBA 0x98 +#define NAND_MFR_ESMT 0xc8 +#define NAND_MFR_SAMSUNG 0xec +#define NAND_MFR_FUJITSU 0x04 +#define NAND_MFR_NATIONAL 0x8f +#define NAND_MFR_RENESAS 0x07 +#define NAND_MFR_STMICRO 0x20 +#define NAND_MFR_HYNIX 0xad +#define NAND_MFR_MICRON 0x2c +#define NAND_MFR_AMD 0x01 +#define NAND_MFR_MACRONIX 0xc2 +#define NAND_MFR_EON 0x92 +#define NAND_MFR_SANDISK 0x45 +#define NAND_MFR_INTEL 0x89 +#define NAND_MFR_ATO 0x9b +#define NAND_MFR_WINBOND 0xef + +/* The maximum expected count of bytes in the NAND ID sequence */ +#define NAND_MAX_ID_LEN 8 + +/* + * A helper for defining older NAND chips where the second ID byte fully + * defined the chip, including the geometry (chip size, eraseblock size, page + * size). All these chips have 512 bytes NAND page size. + */ +#define LEGACY_ID_NAND(nm, devid, chipsz, erasesz, opts) \ + { .name = (nm), {{ .dev_id = (devid) }}, .pagesize = 512, \ + .chipsize = (chipsz), .erasesize = (erasesz), .options = (opts) } + +/* + * A helper for defining newer chips which report their page size and + * eraseblock size via the extended ID bytes. + * + * The real difference between LEGACY_ID_NAND and EXTENDED_ID_NAND is that with + * EXTENDED_ID_NAND, manufacturers overloaded the same device ID so that the + * device ID now only represented a particular total chip size (and voltage, + * buswidth), and the page size, eraseblock size, and OOB size could vary while + * using the same device ID. + */ +#define EXTENDED_ID_NAND(nm, devid, chipsz, opts) \ + { .name = (nm), {{ .dev_id = (devid) }}, .chipsize = (chipsz), \ + .options = (opts) } + +#define NAND_ECC_INFO(_strength, _step) \ + { .strength_ds = (_strength), .step_ds = (_step) } +#define NAND_ECC_STRENGTH(type) ((type)->ecc.strength_ds) +#define NAND_ECC_STEP(type) ((type)->ecc.step_ds) + +/** + * struct nand_flash_dev - NAND Flash Device ID Structure + * @name: a human-readable name of the NAND chip + * @dev_id: the device ID (the second byte of the full chip ID array) + * @mfr_id: manufecturer ID part of the full chip ID array (refers the same + * memory address as @id[0]) + * @dev_id: device ID part of the full chip ID array (refers the same memory + * address as @id[1]) + * @id: full device ID array + * @pagesize: size of the NAND page in bytes; if 0, then the real page size (as + * well as the eraseblock size) is determined from the extended NAND + * chip ID array) + * @chipsize: total chip size in MiB + * @erasesize: eraseblock size in bytes (determined from the extended ID if 0) + * @options: stores various chip bit options + * @id_len: The valid length of the @id. + * @oobsize: OOB size + * @ecc: ECC correctability and step information from the datasheet. + * @ecc.strength_ds: The ECC correctability from the datasheet, same as the + * @ecc_strength_ds in nand_chip{}. + * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the + * @ecc_step_ds in nand_chip{}, also from the datasheet. + * For example, the "4bit ECC for each 512Byte" can be set with + * NAND_ECC_INFO(4, 512). + * @onfi_timing_mode_default: the default ONFI timing mode entered after a NAND + * reset. Should be deduced from timings described + * in the datasheet. + * + */ +struct nand_flash_dev { + char *name; + union { + struct { + uint8_t mfr_id; + uint8_t dev_id; + }; + uint8_t id[NAND_MAX_ID_LEN]; + }; + unsigned int pagesize; + unsigned int chipsize; + unsigned int erasesize; + unsigned int options; + uint16_t id_len; + uint16_t oobsize; + struct { + uint16_t strength_ds; + uint16_t step_ds; + } ecc; + int onfi_timing_mode_default; +}; + +/** + * struct nand_manufacturer - NAND Flash Manufacturer structure + * @name: Manufacturer name + * @id: manufacturer ID code of device. + * @ops: manufacturer operations +*/ +struct nand_manufacturer { + int id; + char *name; + const struct nand_manufacturer_ops *ops; +}; + +const struct nand_manufacturer *nand_get_manufacturer(u8 id); + +static inline const char * +nand_manufacturer_name(const struct nand_manufacturer *manufacturer) +{ + return manufacturer ? manufacturer->name : "Unknown"; +} + +extern struct nand_flash_dev nand_flash_ids[]; + +extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops; +extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; +extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; +extern const struct nand_manufacturer_ops micron_nand_manuf_ops; +extern const struct nand_manufacturer_ops amd_nand_manuf_ops; +extern const struct nand_manufacturer_ops macronix_nand_manuf_ops; + +int nand_default_bbt(struct mtd_info *mtd); +int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); +int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); +int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt); +int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, + int allowbbt); +int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, uint8_t *buf); + +/** + * struct platform_nand_chip - chip level device structure + * @nr_chips: max. number of chips to scan for + * @chip_offset: chip number offset + * @nr_partitions: number of partitions pointed to by partitions (or zero) + * @partitions: mtd partition list + * @chip_delay: R/B delay value in us + * @options: Option flags, e.g. 16bit buswidth + * @bbt_options: BBT option flags, e.g. NAND_BBT_USE_FLASH + * @part_probe_types: NULL-terminated array of probe types + */ +struct platform_nand_chip { + int nr_chips; + int chip_offset; + int nr_partitions; + struct mtd_partition *partitions; + int chip_delay; + unsigned int options; + unsigned int bbt_options; + const char **part_probe_types; +}; + +/* Keep gcc happy */ +struct platform_device; + +/** + * struct platform_nand_ctrl - controller level device structure + * @probe: platform specific function to probe/setup hardware + * @remove: platform specific function to remove/teardown hardware + * @hwcontrol: platform specific hardware control structure + * @dev_ready: platform specific function to read ready/busy pin + * @select_chip: platform specific chip select function + * @cmd_ctrl: platform specific function for controlling + * ALE/CLE/nCE. Also used to write command and address + * @write_buf: platform specific function for write buffer + * @read_buf: platform specific function for read buffer + * @read_byte: platform specific function to read one byte from chip + * @priv: private data to transport driver specific settings + * + * All fields are optional and depend on the hardware driver requirements + */ +struct platform_nand_ctrl { + int (*probe)(struct platform_device *pdev); + void (*remove)(struct platform_device *pdev); + void (*hwcontrol)(struct mtd_info *mtd, int cmd); + int (*dev_ready)(struct mtd_info *mtd); + void (*select_chip)(struct mtd_info *mtd, int chip); + void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); + void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len); + void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len); + unsigned char (*read_byte)(struct mtd_info *mtd); + void *priv; +}; + +/** + * struct platform_nand_data - container structure for platform-specific data + * @chip: chip level chip structure + * @ctrl: controller level device structure + */ +struct platform_nand_data { + struct platform_nand_chip chip; + struct platform_nand_ctrl ctrl; +}; + +/* return the supported features. */ +static inline int onfi_feature(struct nand_chip *chip) +{ + return chip->onfi_version ? le16_to_cpu(chip->onfi_params.features) : 0; +} + +/* return the supported asynchronous timing mode. */ +static inline int onfi_get_async_timing_mode(struct nand_chip *chip) +{ + if (!chip->onfi_version) + return ONFI_TIMING_MODE_UNKNOWN; + return le16_to_cpu(chip->onfi_params.async_timing_mode); +} + +/* return the supported synchronous timing mode. */ +static inline int onfi_get_sync_timing_mode(struct nand_chip *chip) +{ + if (!chip->onfi_version) + return ONFI_TIMING_MODE_UNKNOWN; + return le16_to_cpu(chip->onfi_params.src_sync_timing_mode); +} + +int onfi_init_data_interface(struct nand_chip *chip, + struct nand_data_interface *iface, + enum nand_data_interface_type type, + int timing_mode); + +/* + * Check if it is a SLC nand. + * The !nand_is_slc() can be used to check the MLC/TLC nand chips. + * We do not distinguish the MLC and TLC now. + */ +static inline bool nand_is_slc(struct nand_chip *chip) +{ + return chip->bits_per_cell == 1; +} + +/** + * Check if the opcode's address should be sent only on the lower 8 bits + * @command: opcode to check + */ +static inline int nand_opcode_8bits(unsigned int command) +{ + switch (command) { + case NAND_CMD_READID: + case NAND_CMD_PARAM: + case NAND_CMD_GET_FEATURES: + case NAND_CMD_SET_FEATURES: + return 1; + default: + break; + } + return 0; +} + +/* return the supported JEDEC features. */ +static inline int jedec_feature(struct nand_chip *chip) +{ + return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features) + : 0; +} + +/* get timing characteristics from ONFI timing mode. */ +const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); +/* get data interface from ONFI timing mode 0, used after reset. */ +const struct nand_data_interface *nand_get_default_data_interface(void); + +int nand_check_erased_ecc_chunk(void *data, int datalen, + void *ecc, int ecclen, + void *extraoob, int extraooblen, + int threshold); + +int nand_check_ecc_caps(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +int nand_match_ecc_req(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +int nand_maximize_ecc(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +/* Default write_oob implementation */ +int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Default write_oob syndrome implementation */ +int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page); + +/* Default read_oob implementation */ +int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); + +/* Default read_oob syndrome implementation */ +int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, + int page); + +/* Stub used by drivers that do not support GET/SET FEATURES operations */ +int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd, + struct nand_chip *chip, int addr, + u8 *subfeature_param); + +/* Default read_page_raw implementation */ +int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page); + +/* Default write_page_raw implementation */ +int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required, int page); + +/* Reset and initialize a NAND device */ +int nand_reset(struct nand_chip *chip, int chipnr); + +/* Free resources held by the NAND device */ +void nand_cleanup(struct nand_chip *chip); + +/* Default extended ID decoding function */ +void nand_decode_ext_id(struct nand_chip *chip); +#endif /* __LINUX_MTD_RAWNAND_H */ diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h index 2251add65fa7..c759d403cbc0 100644 --- a/include/linux/mtd/sh_flctl.h +++ b/include/linux/mtd/sh_flctl.h @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index 65e91d0fa981..72a79c7d0e08 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -8,7 +8,7 @@ * published by the Free Software Foundation. */ -#include +#include #include #include diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h index 1cf555aef896..f1a2cf655bdb 100644 --- a/include/linux/platform_data/mtd-davinci.h +++ b/include/linux/platform_data/mtd-davinci.h @@ -28,7 +28,7 @@ #ifndef __ARCH_ARM_DAVINCI_NAND_H #define __ARCH_ARM_DAVINCI_NAND_H -#include +#include #define NANDFCR_OFFSET 0x60 #define NANDFSR_OFFSET 0x64 diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index f01659026b26..f8c553f92655 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -12,7 +12,7 @@ #ifndef __MTD_NAND_S3C2410_H #define __MTD_NAND_S3C2410_H -#include +#include /** * struct s3c2410_nand_set - define a set of one or more nand chips -- cgit From 0a1c7959acd9674a0e4e59f911f3e5fbf25fd693 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 17 May 2017 17:22:18 +0200 Subject: gpu: drm: tc35876x: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Patrik Jakobsson --- include/linux/i2c/tc35876x.h | 11 ----------- include/linux/platform_data/tc35876x.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) delete mode 100644 include/linux/i2c/tc35876x.h create mode 100644 include/linux/platform_data/tc35876x.h (limited to 'include') diff --git a/include/linux/i2c/tc35876x.h b/include/linux/i2c/tc35876x.h deleted file mode 100644 index cd6a51c71e7e..000000000000 --- a/include/linux/i2c/tc35876x.h +++ /dev/null @@ -1,11 +0,0 @@ - -#ifndef _TC35876X_H -#define _TC35876X_H - -struct tc35876x_platform_data { - int gpio_bridge_reset; - int gpio_panel_bl_en; - int gpio_panel_vadd; -}; - -#endif /* _TC35876X_H */ diff --git a/include/linux/platform_data/tc35876x.h b/include/linux/platform_data/tc35876x.h new file mode 100644 index 000000000000..cd6a51c71e7e --- /dev/null +++ b/include/linux/platform_data/tc35876x.h @@ -0,0 +1,11 @@ + +#ifndef _TC35876X_H +#define _TC35876X_H + +struct tc35876x_platform_data { + int gpio_bridge_reset; + int gpio_panel_bl_en; + int gpio_panel_vadd; +}; + +#endif /* _TC35876X_H */ -- cgit From 7c4974786f4794178f04e96318fc3b2f2850cbc6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 11 Aug 2017 19:41:17 +0800 Subject: net: export some generic xdp helpers This patch tries to export some generic xdp helpers to drivers. This can let driver to do XDP for a specific skb. This is useful for the case when the packet is hard to be processed at page level directly (e.g jumbo/GSO frame). With this patch, there's no need for driver to forbid the XDP set when configuration is not suitable. Instead, it can defer the XDP for packets that is hard to be processed directly after skb is created. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1d238d54c484..0f1c4cb2441e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3243,6 +3243,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); } +void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); -- cgit From 34fc75bfc616f1c1fbab56508c3f48f4b97c97ea Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Fri, 11 Aug 2017 16:24:15 +0200 Subject: uapi/linux/quota.h: Do not include linux/errno.h linux/errno.h is very sensitive to coordination with libc headers. Nothing in linux/quota.h needs it, so this change allows using this header in more contexts. Signed-off-by: Florian Weimer Signed-off-by: Jan Kara --- include/uapi/linux/quota.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h index 4d2489ef6f10..f17c9636a859 100644 --- a/include/uapi/linux/quota.h +++ b/include/uapi/linux/quota.h @@ -33,7 +33,6 @@ #ifndef _UAPI_LINUX_QUOTA_ #define _UAPI_LINUX_QUOTA_ -#include #include #define __DQUOT_VERSION__ "dquot_6.6.0" -- cgit From 2be03aedbb6d78929c9e3be0d1850830303744f7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Aug 2017 10:23:25 +0200 Subject: i2c: mux: pca954x: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Peter Rosin --- include/linux/i2c/pca954x.h | 48 ----------------------------------- include/linux/platform_data/pca954x.h | 48 +++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) delete mode 100644 include/linux/i2c/pca954x.h create mode 100644 include/linux/platform_data/pca954x.h (limited to 'include') diff --git a/include/linux/i2c/pca954x.h b/include/linux/i2c/pca954x.h deleted file mode 100644 index 1712677d5904..000000000000 --- a/include/linux/i2c/pca954x.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * - * pca954x.h - I2C multiplexer/switch support - * - * Copyright (c) 2008-2009 Rodolfo Giometti - * Copyright (c) 2008-2009 Eurotech S.p.A. - * Michael Lawnick - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -#ifndef _LINUX_I2C_PCA954X_H -#define _LINUX_I2C_PCA954X_H - -/* Platform data for the PCA954x I2C multiplexers */ - -/* Per channel initialisation data: - * @adap_id: bus number for the adapter. 0 = don't care - * @deselect_on_exit: set this entry to 1, if your H/W needs deselection - * of this channel after transaction. - * - */ -struct pca954x_platform_mode { - int adap_id; - unsigned int deselect_on_exit:1; - unsigned int class; -}; - -/* Per mux/switch data, used with i2c_register_board_info */ -struct pca954x_platform_data { - struct pca954x_platform_mode *modes; - int num_modes; -}; - -#endif /* _LINUX_I2C_PCA954X_H */ diff --git a/include/linux/platform_data/pca954x.h b/include/linux/platform_data/pca954x.h new file mode 100644 index 000000000000..1712677d5904 --- /dev/null +++ b/include/linux/platform_data/pca954x.h @@ -0,0 +1,48 @@ +/* + * + * pca954x.h - I2C multiplexer/switch support + * + * Copyright (c) 2008-2009 Rodolfo Giometti + * Copyright (c) 2008-2009 Eurotech S.p.A. + * Michael Lawnick + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#ifndef _LINUX_I2C_PCA954X_H +#define _LINUX_I2C_PCA954X_H + +/* Platform data for the PCA954x I2C multiplexers */ + +/* Per channel initialisation data: + * @adap_id: bus number for the adapter. 0 = don't care + * @deselect_on_exit: set this entry to 1, if your H/W needs deselection + * of this channel after transaction. + * + */ +struct pca954x_platform_mode { + int adap_id; + unsigned int deselect_on_exit:1; + unsigned int class; +}; + +/* Per mux/switch data, used with i2c_register_board_info */ +struct pca954x_platform_data { + struct pca954x_platform_mode *modes; + int num_modes; +}; + +#endif /* _LINUX_I2C_PCA954X_H */ -- cgit From 26b1083b4545ec08a7dcfe759323a2e142d60d75 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Aug 2017 10:23:26 +0200 Subject: i2c: mux: mlxcpld: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Signed-off-by: Peter Rosin --- include/linux/i2c/mlxcpld.h | 52 ------------------------------- include/linux/platform_data/x86/mlxcpld.h | 52 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 52 deletions(-) delete mode 100644 include/linux/i2c/mlxcpld.h create mode 100644 include/linux/platform_data/x86/mlxcpld.h (limited to 'include') diff --git a/include/linux/i2c/mlxcpld.h b/include/linux/i2c/mlxcpld.h deleted file mode 100644 index b08dcb183fca..000000000000 --- a/include/linux/i2c/mlxcpld.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * mlxcpld.h - Mellanox I2C multiplexer support in CPLD - * - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. - * Copyright (c) 2016 Michael Shych - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_I2C_MLXCPLD_H -#define _LINUX_I2C_MLXCPLD_H - -/* Platform data for the CPLD I2C multiplexers */ - -/* mlxcpld_mux_plat_data - per mux data, used with i2c_register_board_info - * @adap_ids - adapter array - * @num_adaps - number of adapters - * @sel_reg_addr - mux select register offset in CPLD space - */ -struct mlxcpld_mux_plat_data { - int *adap_ids; - int num_adaps; - int sel_reg_addr; -}; - -#endif /* _LINUX_I2C_MLXCPLD_H */ diff --git a/include/linux/platform_data/x86/mlxcpld.h b/include/linux/platform_data/x86/mlxcpld.h new file mode 100644 index 000000000000..b08dcb183fca --- /dev/null +++ b/include/linux/platform_data/x86/mlxcpld.h @@ -0,0 +1,52 @@ +/* + * mlxcpld.h - Mellanox I2C multiplexer support in CPLD + * + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Michael Shych + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _LINUX_I2C_MLXCPLD_H +#define _LINUX_I2C_MLXCPLD_H + +/* Platform data for the CPLD I2C multiplexers */ + +/* mlxcpld_mux_plat_data - per mux data, used with i2c_register_board_info + * @adap_ids - adapter array + * @num_adaps - number of adapters + * @sel_reg_addr - mux select register offset in CPLD space + */ +struct mlxcpld_mux_plat_data { + int *adap_ids; + int num_adaps; + int sel_reg_addr; +}; + +#endif /* _LINUX_I2C_MLXCPLD_H */ -- cgit From c4aee3e1b0de8e732fafd0d13e75ab7bdbc606ef Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Sun, 21 May 2017 18:48:07 +0200 Subject: i2c: mux: pinctrl: remove platform_data No platform (at least no upstreamed platform) has ever used this platform_data. Just drop it and simplify the code. Reviewed-by: Stephen Warren Acked-by: Wolfram Sang Signed-off-by: Peter Rosin --- include/linux/i2c-mux-pinctrl.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 include/linux/i2c-mux-pinctrl.h (limited to 'include') diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h deleted file mode 100644 index a65c86429e84..000000000000 --- a/include/linux/i2c-mux-pinctrl.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * i2c-mux-pinctrl platform data - * - * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _LINUX_I2C_MUX_PINCTRL_H -#define _LINUX_I2C_MUX_PINCTRL_H - -/** - * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl - * @parent_bus_num: Parent I2C bus number - * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic. - * @bus_count: Number of child busses. Also the number of elements in - * @pinctrl_states - * @pinctrl_states: The names of the pinctrl state to select for each child bus - * @pinctrl_state_idle: The pinctrl state to select when no child bus is being - * accessed. If NULL, the most recently used pinctrl state will be left - * selected. - */ -struct i2c_mux_pinctrl_platform_data { - int parent_bus_num; - int base_bus_num; - int bus_count; - const char **pinctrl_states; - const char *pinctrl_state_idle; -}; - -#endif -- cgit From 3f713b7c223ebe5094973ce6e0272bd97363b552 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 4 Aug 2017 11:22:31 +0900 Subject: pinctrl: move const qualifier before struct Update subsystem wide for consistency. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/machine.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index e5b1716f98cc..7fa5d87190c2 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -152,12 +152,12 @@ struct pinctrl_map { #ifdef CONFIG_PINCTRL -extern int pinctrl_register_mappings(struct pinctrl_map const *map, +extern int pinctrl_register_mappings(const struct pinctrl_map *map, unsigned num_maps); extern void pinctrl_provide_dummies(void); #else -static inline int pinctrl_register_mappings(struct pinctrl_map const *map, +static inline int pinctrl_register_mappings(const struct pinctrl_map *map, unsigned num_maps) { return 0; -- cgit From 950d55f5bf7991222e34428b5f779bd030b6a42a Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 24 Jul 2017 16:57:22 +0200 Subject: gpio: Cleanup kerneldoc Some kerneldoc has become stale or wasn't quite correct from the outset. Fix up the most serious issues to silence warnings when building the documentation. Signed-off-by: Thierry Reding Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index af20369ec8e7..ad4150d075c3 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -327,11 +327,10 @@ int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset, /** * struct gpio_pin_range - pin range controlled by a gpio chip - * @head: list for maintaining set of pin ranges, used internally + * @node: list for maintaining set of pin ranges, used internally * @pctldev: pinctrl device which handles corresponding pins * @range: actual range of pins controlled by a gpio controller */ - struct gpio_pin_range { struct list_head node; struct pinctrl_dev *pctldev; -- cgit From 67049c505017d7b380e16aa4f4b02344c2be0d55 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 24 Jul 2017 16:57:23 +0200 Subject: gpio: of: Improve kerneldoc Add descriptions for missing fields and fix up some parameter references to match the code. Signed-off-by: Thierry Reding Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ad4150d075c3..fe66c9306caf 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -180,8 +180,27 @@ struct gpio_chip { * If CONFIG_OF is enabled, then all GPIO controllers described in the * device tree automatically may have an OF translation */ + + /** + * @of_node: + * + * Pointer to a device tree node representing this GPIO controller. + */ struct device_node *of_node; + + /** + * @of_gpio_n_cells: + * + * Number of cells used to form the GPIO specifier. + */ int of_gpio_n_cells; + + /** + * @of_xlate: + * + * Callback to translate a device tree GPIO specifier into a chip- + * relative GPIO number and flags. + */ int (*of_xlate)(struct gpio_chip *gc, const struct of_phandle_args *gpiospec, u32 *flags); #endif -- cgit From e3b445d77819b8fb41e3ceae9dbd49c8c8427c5d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 24 Jul 2017 16:57:28 +0200 Subject: gpio: Use unsigned int for of_gpio_n_cells The cell count for GPIO specifiers can never be negative, so make the field unsigned. Signed-off-by: Thierry Reding Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index fe66c9306caf..c97f8325e8bf 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -193,7 +193,7 @@ struct gpio_chip { * * Number of cells used to form the GPIO specifier. */ - int of_gpio_n_cells; + unsigned int of_gpio_n_cells; /** * @of_xlate: -- cgit From 4f04ff03dc48b81b436abf4f8600f9553825e39f Mon Sep 17 00:00:00 2001 From: Chen Zhong Date: Mon, 7 Aug 2017 15:24:36 +0800 Subject: soc: mediatek: add header files required for MT7622 SCPSYS dt-binding Add relevant header files required for dt-bindings of SCPSYS power domain control for all subsystems found on MT7622 SoC. Signed-off-by: Chen Zhong Signed-off-by: Sean Wang Reviewed-by: Ulf Hansson Signed-off-by: Matthias Brugger --- include/dt-bindings/power/mt7622-power.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/dt-bindings/power/mt7622-power.h (limited to 'include') diff --git a/include/dt-bindings/power/mt7622-power.h b/include/dt-bindings/power/mt7622-power.h new file mode 100644 index 000000000000..1b639269790c --- /dev/null +++ b/include/dt-bindings/power/mt7622-power.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 MediaTek Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See http://www.gnu.org/licenses/gpl-2.0.html for more details. + */ + +#ifndef _DT_BINDINGS_POWER_MT7622_POWER_H +#define _DT_BINDINGS_POWER_MT7622_POWER_H + +#define MT7622_POWER_DOMAIN_ETHSYS 0 +#define MT7622_POWER_DOMAIN_HIF0 1 +#define MT7622_POWER_DOMAIN_HIF1 2 +#define MT7622_POWER_DOMAIN_WB 3 + +#endif /* _DT_BINDINGS_POWER_MT7622_POWER_H */ -- cgit From 52510ee934885ec846faaa5d029329c9ba0e6ecc Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 7 Aug 2017 15:24:37 +0800 Subject: soc: mediatek: add SCPSYS power domain driver for MediaTek MT7622 SoC Add SCPSYS power domain driver for MT7622 SoC having four power domains which are respectively ETHSYS for Ethernet including embedded switch, WBSYS for WIFI and Bluetooth, HIF0SYS for PCI-E and SATA, and HIF1SYS for USB. Those functions could be selectively powered gated when the corresponding function is no longer to use in order to reach more minimal power dissipation. Signed-off-by: Chen Zhong Signed-off-by: Sean Wang Reviewed-by: Ulf Hansson Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index a5714e93fb34..a0182ec2a621 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -20,6 +20,13 @@ #define MT8173_TOP_AXI_PROT_EN_MFG_M1 BIT(22) #define MT8173_TOP_AXI_PROT_EN_MFG_SNOOP_OUT BIT(23) +#define MT7622_TOP_AXI_PROT_EN_ETHSYS (BIT(3) | BIT(17)) +#define MT7622_TOP_AXI_PROT_EN_HIF0 (BIT(24) | BIT(25)) +#define MT7622_TOP_AXI_PROT_EN_HIF1 (BIT(26) | BIT(27) | \ + BIT(28)) +#define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ + BIT(7) | BIT(8)) + int mtk_infracfg_set_bus_protection(struct regmap *infracfg, u32 mask); int mtk_infracfg_clear_bus_protection(struct regmap *infracfg, u32 mask); -- cgit From 561099a1a2e992a482a8318c0c9c5af26222e5cd Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Tue, 8 Aug 2017 15:42:39 +0200 Subject: leds: pca955x: add GPIO support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCA955x family of chips are I2C LED blinkers whose pins not used to control LEDs can be used as general purpose I/Os (GPIOs). The following adds such a support by defining different operation modes for the pins (See bindings documentation for more details). The pca955x driver is then extended with a gpio_chip when some of pins are operating as GPIOs. The default operating mode is to behave as a LED. The GPIO support is conditioned by CONFIG_LEDS_PCA955X_GPIO. Signed-off-by: Cédric Le Goater Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/dt-bindings/leds/leds-pca955x.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/dt-bindings/leds/leds-pca955x.h (limited to 'include') diff --git a/include/dt-bindings/leds/leds-pca955x.h b/include/dt-bindings/leds/leds-pca955x.h new file mode 100644 index 000000000000..78cb7e979de7 --- /dev/null +++ b/include/dt-bindings/leds/leds-pca955x.h @@ -0,0 +1,16 @@ +/* + * This header provides constants for pca955x LED bindings. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef _DT_BINDINGS_LEDS_PCA955X_H +#define _DT_BINDINGS_LEDS_PCA955X_H + +#define PCA955X_TYPE_NONE 0 +#define PCA955X_TYPE_LED 1 +#define PCA955X_TYPE_GPIO 2 + +#endif /* _DT_BINDINGS_LEDS_PCA955X_H */ -- cgit From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:53 +0000 Subject: seccomp: Operation for checking if an action is available Userspace code that needs to check if the kernel supports a given action may not be able to use the /proc/sys/kernel/seccomp/actions_avail sysctl. The process may be running in a sandbox and, therefore, sufficient filesystem access may not be available. This patch adds an operation to the seccomp(2) syscall that allows userspace code to ask the kernel if a given action is available. If the action is supported by the kernel, 0 is returned. If the action is not supported by the kernel, -1 is returned with errno set to -EOPNOTSUPP. If this check is attempted on a kernel that doesn't support this new operation, -1 is returned with errno set to -EINVAL meaning that userspace code will have the ability to differentiate between the two error cases. Signed-off-by: Tyler Hicks Suggested-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..aaad61cc46bc 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -11,8 +11,9 @@ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ /* Valid operations for seccomp syscall. */ -#define SECCOMP_SET_MODE_STRICT 0 -#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 -- cgit From 0ddec0fc8900201c0897b87b762b7c420436662f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:54 +0000 Subject: seccomp: Sysctl to configure actions that are allowed to be logged Adminstrators can write to this sysctl to set the seccomp actions that are allowed to be logged. Any actions not found in this sysctl will not be logged. For example, all SECCOMP_RET_KILL, SECCOMP_RET_TRAP, and SECCOMP_RET_ERRNO actions would be loggable if "kill trap errno" were written to the sysctl. SECCOMP_RET_TRACE actions would not be logged since its string representation ("trace") wasn't present in the sysctl value. The path to the sysctl is: /proc/sys/kernel/seccomp/actions_logged The actions_avail sysctl can be read to discover the valid action names that can be written to the actions_logged sysctl with the exception of "allow". SECCOMP_RET_ALLOW actions cannot be configured for logging. The default setting for the sysctl is to allow all actions to be logged except SECCOMP_RET_ALLOW. While only SECCOMP_RET_KILL actions are currently logged, an upcoming patch will allow applications to request additional actions to be logged. There's one important exception to this sysctl. If a task is specifically being audited, meaning that an audit context has been allocated for the task, seccomp will log all actions other than SECCOMP_RET_ALLOW despite the value of actions_logged. This exception preserves the existing auditing behavior of tasks with an allocated audit context. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if audit_enabled && task-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- include/linux/audit.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2150bdccfbab..8c30f06d639d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -314,11 +314,7 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (!audit_enabled) - return; - - /* Force a record to be reported if a signal was delivered. */ - if (signr || unlikely(!audit_dummy_context())) + if (audit_enabled && unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } -- cgit From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:56 +0000 Subject: seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for all actions except for SECCOMP_RET_ALLOW for the given filter. SECCOMP_RET_KILL actions are always logged, when "kill" is in the actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged, regardless of this flag. This flag can be used to create noisy filters that result in all non-allowed actions to be logged. A process may have one noisy filter, which is loaded with this flag, as well as a quiet filter that's not loaded with this flag. This allows for the actions in a set of filters to be selectively conveyed to the admin. Since a system could have a large number of allocated seccomp_filter structs, struct packing was taken in consideration. On 64 bit x86, the new log member takes up one byte of an existing four byte hole in the struct. On 32 bit x86, the new log member creates a new four byte hole (unavoidable) and consumes one of those bytes. Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not capable of inspecting the audit log to verify that the actions taken in the filter were logged. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if filter-requests-logging && action in actions_logged: log else if audit_enabled && process-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- include/linux/seccomp.h | 3 ++- include/uapi/linux/seccomp.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index ecc296c137cd..c8bef436b61d 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index aaad61cc46bc..19a611d0712e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -17,6 +17,7 @@ /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_LOG 2 /* * All BPF programs must return a 32-bit value. -- cgit From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:57 +0000 Subject: seccomp: Action to log before allowing Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing the syscall. At the implementation level, this action is identical to the existing SECCOMP_RET_ALLOW action. However, it can be very useful when initially developing a seccomp filter for an application. The developer can set the default action to be SECCOMP_RET_LOG, maybe mark any obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the application through its paces. A list of syscalls that triggered the default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and that list can be used to build the syscall whitelist. Finally, the developer can change the default action to the desired value. This provides a more friendly experience than seeing the application get killed, then updating the filter and rebuilding the app, seeing the application get killed due to a different syscall, then updating the filter and rebuilding the app, etc. The functionality is similar to what's supported by the various LSMs. SELinux has permissive mode, AppArmor has complain mode, SMACK has bring-up mode, etc. SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow while logging is slightly more restrictive than quietly allowing. Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of inspecting the audit log to verify that the syscall was logged. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if action == RET_LOG && RET_LOG in actions_logged: log else if filter-requests-logging && action in actions_logged: log else if audit_enabled && process-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 19a611d0712e..f94433263e4b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -31,6 +31,7 @@ #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ -- cgit From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 12:53:18 -0700 Subject: seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL to the more accurate SECCOMP_RET_KILL_THREAD. The existing selftest values are intentionally left as SECCOMP_RET_KILL just to be sure we're exercising the alias. Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index f94433263e4b..5a03f699eb17 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -27,7 +27,8 @@ * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -- cgit From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:01:39 -0700 Subject: seccomp: Introduce SECCOMP_RET_KILL_PROCESS This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill an entire process. This cannot yet be reached by seccomp, but it changes the default-kill behavior (for unknown return values) from kill-thread to kill-process. Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 5a03f699eb17..7e77c92df78a 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -22,18 +22,20 @@ /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. - * The upper 16-bits are ordered from least permissive values to most. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). * * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ -#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ #define SECCOMP_RET_ACTION 0x7fff0000U -- cgit From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:12:11 -0700 Subject: seccomp: Implement SECCOMP_RET_KILL_PROCESS action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, SECCOMP_RET_KILL_THREAD (neé SECCOMP_RET_KILL) kills the current thread. There have been a few requests for this to kill the entire process (the thread group). This cannot be just changed (discovered when adding coredump support since coredumping kills the entire process) because there are userspace programs depending on the thread-kill behavior. Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can be processed as "-1" by the kernel, below the existing RET_KILL that is ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only be visible when examining the siginfo in a core dump from a RET_KILL_*, where it will think it was thread-killed instead of process-killed. Attempts to introduce this behavior via other ways (filter flags, seccomp struct flags, masked RET_DATA bits) all come with weird side-effects and baggage. This change preserves the central behavioral expectations of the seccomp filter engine without putting too great a burden on changes needed in userspace to use the new action. The new action is discoverable by userspace through either the new actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp operation. If used without checking for availability, old kernels will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask will produce RET_KILL_THREAD). Cc: Paul Moore Cc: Fabricio Voznika Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 7e77c92df78a..f6bc1dea3247 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -38,6 +38,7 @@ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU -- cgit From a99b646afa8a02571ea298bedca6592d818229cd Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:23 +0800 Subject: PCI: Disable PCIe Relaxed Ordering if unsupported When bit4 is set in the PCIe Device Control register, it indicates whether the device is permitted to use relaxed ordering. On some platforms using relaxed ordering can have performance issues or due to erratum can cause data-corruption. In such cases devices must avoid using relaxed ordering. The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that Relaxed Ordering (RO) attribute should not be used for Transaction Layer Packets (TLP) targeted towards these affected root complexes. This patch checks if there is any node in the hierarchy that indicates that using relaxed ordering is not safe. In such cases the patch turns off the relaxed ordering by clearing the capability for this device. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Ashok Raj Acked-by: Alexander Duyck Acked-by: Casey Leedom Signed-off-by: David S. Miller --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..29606fb89464 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -188,6 +188,8 @@ enum pci_dev_flags { * the direct_complete optimization. */ PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), + /* Don't use Relaxed Ordering for TLPs directed at this device */ + PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { @@ -1125,6 +1127,7 @@ bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); +bool pcie_relaxed_ordering_enabled(struct pci_dev *dev); /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); -- cgit From 42b7305905be52e467bbc346b0f2f95ad44eb1a0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Aug 2017 21:31:38 +0200 Subject: udp: fix linear skb reception with PEEK_OFF copy_linear_skb() is broken; both of its callers actually expect 'len' to be the amount we are trying to copy, not the offset of the end. Fix it keeping the meanings of arguments in sync with what the callers (both of them) expect. Also restore a saner behavior on EFAULT (i.e. preserving the iov_iter position in case of failure): The commit fd851ba9caa9 ("udp: harden copy_linear_skb()") avoids the more destructive effect of the buggy copy_linear_skb(), e.g. no more invalid memory access, but said function still behaves incorrectly: when peeking with offset it can fail with EINVAL instead of copying the appropriate amount of memory. Reported-by: Sasha Levin Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Fixes: fd851ba9caa9 ("udp: harden copy_linear_skb()") Signed-off-by: Al Viro Acked-by: Paolo Abeni Tested-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index e9b1d1eacb59..586de4b811b5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -366,14 +366,13 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n, copy = len - off; + int n; - if (copy < 0) - return -EINVAL; - n = copy_to_iter(skb->data + off, copy, to); - if (n == copy) + n = copy_to_iter(skb->data + off, len, to); + if (n == len) return 0; + iov_iter_revert(to, n); return -EFAULT; } -- cgit From 0335a9554b4d6acc2d17efe752bff54ce59b57eb Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Aug 2017 18:34:22 +0200 Subject: mfd: dm355evm_msp: Move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Alexandre Belloni Acked-by: Dmitry Torokhov Signed-off-by: Lee Jones --- include/linux/i2c/dm355evm_msp.h | 79 ---------------------------------------- include/linux/mfd/dm355evm_msp.h | 79 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 79 deletions(-) delete mode 100644 include/linux/i2c/dm355evm_msp.h create mode 100644 include/linux/mfd/dm355evm_msp.h (limited to 'include') diff --git a/include/linux/i2c/dm355evm_msp.h b/include/linux/i2c/dm355evm_msp.h deleted file mode 100644 index 372470350fab..000000000000 --- a/include/linux/i2c/dm355evm_msp.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * dm355evm_msp.h - support MSP430 microcontroller on DM355EVM board - */ -#ifndef __LINUX_I2C_DM355EVM_MSP -#define __LINUX_I2C_DM355EVM_MSP - -/* - * Written against Spectrum's writeup for the A4 firmware revision, - * and tweaked to match source and rev D2 schematics by removing CPLD - * and NOR flash hooks (which were last appropriate in rev B boards). - * - * Note that the firmware supports a flavor of write posting ... to be - * sure a write completes, issue another read or write. - */ - -/* utilities to access "registers" emulated by msp430 firmware */ -extern int dm355evm_msp_write(u8 value, u8 reg); -extern int dm355evm_msp_read(u8 reg); - - -/* command/control registers */ -#define DM355EVM_MSP_COMMAND 0x00 -# define MSP_COMMAND_NULL 0 -# define MSP_COMMAND_RESET_COLD 1 -# define MSP_COMMAND_RESET_WARM 2 -# define MSP_COMMAND_RESET_WARM_I 3 -# define MSP_COMMAND_POWEROFF 4 -# define MSP_COMMAND_IR_REINIT 5 -#define DM355EVM_MSP_STATUS 0x01 -# define MSP_STATUS_BAD_OFFSET BIT(0) -# define MSP_STATUS_BAD_COMMAND BIT(1) -# define MSP_STATUS_POWER_ERROR BIT(2) -# define MSP_STATUS_RXBUF_OVERRUN BIT(3) -#define DM355EVM_MSP_RESET 0x02 /* 0 bits == in reset */ -# define MSP_RESET_DC5 BIT(0) -# define MSP_RESET_TVP5154 BIT(2) -# define MSP_RESET_IMAGER BIT(3) -# define MSP_RESET_ETHERNET BIT(4) -# define MSP_RESET_SYS BIT(5) -# define MSP_RESET_AIC33 BIT(7) - -/* GPIO registers ... bit patterns mostly match the source MSP ports */ -#define DM355EVM_MSP_LED 0x03 /* active low (MSP P4) */ -#define DM355EVM_MSP_SWITCH1 0x04 /* (MSP P5, masked) */ -# define MSP_SWITCH1_SW6_1 BIT(0) -# define MSP_SWITCH1_SW6_2 BIT(1) -# define MSP_SWITCH1_SW6_3 BIT(2) -# define MSP_SWITCH1_SW6_4 BIT(3) -# define MSP_SWITCH1_J1 BIT(4) /* NTSC/PAL */ -# define MSP_SWITCH1_MSP_INT BIT(5) /* active low */ -#define DM355EVM_MSP_SWITCH2 0x05 /* (MSP P6, masked) */ -# define MSP_SWITCH2_SW10 BIT(3) -# define MSP_SWITCH2_SW11 BIT(4) -# define MSP_SWITCH2_SW12 BIT(5) -# define MSP_SWITCH2_SW13 BIT(6) -# define MSP_SWITCH2_SW14 BIT(7) -#define DM355EVM_MSP_SDMMC 0x06 /* (MSP P2, masked) */ -# define MSP_SDMMC_0_WP BIT(1) -# define MSP_SDMMC_0_CD BIT(2) /* active low */ -# define MSP_SDMMC_1_WP BIT(3) -# define MSP_SDMMC_1_CD BIT(4) /* active low */ -#define DM355EVM_MSP_FIRMREV 0x07 /* not a GPIO (out of order) */ -#define DM355EVM_MSP_VIDEO_IN 0x08 /* (MSP P3, masked) */ -# define MSP_VIDEO_IMAGER BIT(7) /* low == tvp5146 */ - -/* power supply registers are currently omitted */ - -/* RTC registers */ -#define DM355EVM_MSP_RTC_0 0x12 /* LSB */ -#define DM355EVM_MSP_RTC_1 0x13 -#define DM355EVM_MSP_RTC_2 0x14 -#define DM355EVM_MSP_RTC_3 0x15 /* MSB */ - -/* input event queue registers; code == ((HIGH << 8) | LOW) */ -#define DM355EVM_MSP_INPUT_COUNT 0x16 /* decrement by reading LOW */ -#define DM355EVM_MSP_INPUT_HIGH 0x17 -#define DM355EVM_MSP_INPUT_LOW 0x18 - -#endif /* __LINUX_I2C_DM355EVM_MSP */ diff --git a/include/linux/mfd/dm355evm_msp.h b/include/linux/mfd/dm355evm_msp.h new file mode 100644 index 000000000000..372470350fab --- /dev/null +++ b/include/linux/mfd/dm355evm_msp.h @@ -0,0 +1,79 @@ +/* + * dm355evm_msp.h - support MSP430 microcontroller on DM355EVM board + */ +#ifndef __LINUX_I2C_DM355EVM_MSP +#define __LINUX_I2C_DM355EVM_MSP + +/* + * Written against Spectrum's writeup for the A4 firmware revision, + * and tweaked to match source and rev D2 schematics by removing CPLD + * and NOR flash hooks (which were last appropriate in rev B boards). + * + * Note that the firmware supports a flavor of write posting ... to be + * sure a write completes, issue another read or write. + */ + +/* utilities to access "registers" emulated by msp430 firmware */ +extern int dm355evm_msp_write(u8 value, u8 reg); +extern int dm355evm_msp_read(u8 reg); + + +/* command/control registers */ +#define DM355EVM_MSP_COMMAND 0x00 +# define MSP_COMMAND_NULL 0 +# define MSP_COMMAND_RESET_COLD 1 +# define MSP_COMMAND_RESET_WARM 2 +# define MSP_COMMAND_RESET_WARM_I 3 +# define MSP_COMMAND_POWEROFF 4 +# define MSP_COMMAND_IR_REINIT 5 +#define DM355EVM_MSP_STATUS 0x01 +# define MSP_STATUS_BAD_OFFSET BIT(0) +# define MSP_STATUS_BAD_COMMAND BIT(1) +# define MSP_STATUS_POWER_ERROR BIT(2) +# define MSP_STATUS_RXBUF_OVERRUN BIT(3) +#define DM355EVM_MSP_RESET 0x02 /* 0 bits == in reset */ +# define MSP_RESET_DC5 BIT(0) +# define MSP_RESET_TVP5154 BIT(2) +# define MSP_RESET_IMAGER BIT(3) +# define MSP_RESET_ETHERNET BIT(4) +# define MSP_RESET_SYS BIT(5) +# define MSP_RESET_AIC33 BIT(7) + +/* GPIO registers ... bit patterns mostly match the source MSP ports */ +#define DM355EVM_MSP_LED 0x03 /* active low (MSP P4) */ +#define DM355EVM_MSP_SWITCH1 0x04 /* (MSP P5, masked) */ +# define MSP_SWITCH1_SW6_1 BIT(0) +# define MSP_SWITCH1_SW6_2 BIT(1) +# define MSP_SWITCH1_SW6_3 BIT(2) +# define MSP_SWITCH1_SW6_4 BIT(3) +# define MSP_SWITCH1_J1 BIT(4) /* NTSC/PAL */ +# define MSP_SWITCH1_MSP_INT BIT(5) /* active low */ +#define DM355EVM_MSP_SWITCH2 0x05 /* (MSP P6, masked) */ +# define MSP_SWITCH2_SW10 BIT(3) +# define MSP_SWITCH2_SW11 BIT(4) +# define MSP_SWITCH2_SW12 BIT(5) +# define MSP_SWITCH2_SW13 BIT(6) +# define MSP_SWITCH2_SW14 BIT(7) +#define DM355EVM_MSP_SDMMC 0x06 /* (MSP P2, masked) */ +# define MSP_SDMMC_0_WP BIT(1) +# define MSP_SDMMC_0_CD BIT(2) /* active low */ +# define MSP_SDMMC_1_WP BIT(3) +# define MSP_SDMMC_1_CD BIT(4) /* active low */ +#define DM355EVM_MSP_FIRMREV 0x07 /* not a GPIO (out of order) */ +#define DM355EVM_MSP_VIDEO_IN 0x08 /* (MSP P3, masked) */ +# define MSP_VIDEO_IMAGER BIT(7) /* low == tvp5146 */ + +/* power supply registers are currently omitted */ + +/* RTC registers */ +#define DM355EVM_MSP_RTC_0 0x12 /* LSB */ +#define DM355EVM_MSP_RTC_1 0x13 +#define DM355EVM_MSP_RTC_2 0x14 +#define DM355EVM_MSP_RTC_3 0x15 /* MSB */ + +/* input event queue registers; code == ((HIGH << 8) | LOW) */ +#define DM355EVM_MSP_INPUT_COUNT 0x16 /* decrement by reading LOW */ +#define DM355EVM_MSP_INPUT_HIGH 0x17 +#define DM355EVM_MSP_INPUT_LOW 0x18 + +#endif /* __LINUX_I2C_DM355EVM_MSP */ -- cgit From 9787076c43a86f3465d46aa344cc3e2f607ae72f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Aug 2017 18:34:23 +0200 Subject: mfd: tps65010: Move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Greg Kroah-Hartman Acked-by: Krzysztof Kozlowski Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Tony Lindgren Signed-off-by: Lee Jones --- include/linux/i2c/tps65010.h | 205 ------------------------------------------- include/linux/mfd/tps65010.h | 205 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+), 205 deletions(-) delete mode 100644 include/linux/i2c/tps65010.h create mode 100644 include/linux/mfd/tps65010.h (limited to 'include') diff --git a/include/linux/i2c/tps65010.h b/include/linux/i2c/tps65010.h deleted file mode 100644 index 08aa92278d71..000000000000 --- a/include/linux/i2c/tps65010.h +++ /dev/null @@ -1,205 +0,0 @@ -/* linux/i2c/tps65010.h - * - * Functions to access TPS65010 power management device. - * - * Copyright (C) 2004 Dirk Behme - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __LINUX_I2C_TPS65010_H -#define __LINUX_I2C_TPS65010_H - -/* - * ---------------------------------------------------------------------------- - * Registers, all 8 bits - * ---------------------------------------------------------------------------- - */ - -#define TPS_CHGSTATUS 0x01 -# define TPS_CHG_USB (1 << 7) -# define TPS_CHG_AC (1 << 6) -# define TPS_CHG_THERM (1 << 5) -# define TPS_CHG_TERM (1 << 4) -# define TPS_CHG_TAPER_TMO (1 << 3) -# define TPS_CHG_CHG_TMO (1 << 2) -# define TPS_CHG_PRECHG_TMO (1 << 1) -# define TPS_CHG_TEMP_ERR (1 << 0) -#define TPS_REGSTATUS 0x02 -# define TPS_REG_ONOFF (1 << 7) -# define TPS_REG_COVER (1 << 6) -# define TPS_REG_UVLO (1 << 5) -# define TPS_REG_NO_CHG (1 << 4) /* tps65013 */ -# define TPS_REG_PG_LD02 (1 << 3) -# define TPS_REG_PG_LD01 (1 << 2) -# define TPS_REG_PG_MAIN (1 << 1) -# define TPS_REG_PG_CORE (1 << 0) -#define TPS_MASK1 0x03 -#define TPS_MASK2 0x04 -#define TPS_ACKINT1 0x05 -#define TPS_ACKINT2 0x06 -#define TPS_CHGCONFIG 0x07 -# define TPS_CHARGE_POR (1 << 7) /* 65010/65012 */ -# define TPS65013_AUA (1 << 7) /* 65011/65013 */ -# define TPS_CHARGE_RESET (1 << 6) -# define TPS_CHARGE_FAST (1 << 5) -# define TPS_CHARGE_CURRENT (3 << 3) -# define TPS_VBUS_500MA (1 << 2) -# define TPS_VBUS_CHARGING (1 << 1) -# define TPS_CHARGE_ENABLE (1 << 0) -#define TPS_LED1_ON 0x08 -#define TPS_LED1_PER 0x09 -#define TPS_LED2_ON 0x0a -#define TPS_LED2_PER 0x0b -#define TPS_VDCDC1 0x0c -# define TPS_ENABLE_LP (1 << 3) -#define TPS_VDCDC2 0x0d -# define TPS_LP_COREOFF (1 << 7) -# define TPS_VCORE_1_8V (7<<4) -# define TPS_VCORE_1_5V (6 << 4) -# define TPS_VCORE_1_4V (5 << 4) -# define TPS_VCORE_1_3V (4 << 4) -# define TPS_VCORE_1_2V (3 << 4) -# define TPS_VCORE_1_1V (2 << 4) -# define TPS_VCORE_1_0V (1 << 4) -# define TPS_VCORE_0_85V (0 << 4) -# define TPS_VCORE_LP_1_2V (3 << 2) -# define TPS_VCORE_LP_1_1V (2 << 2) -# define TPS_VCORE_LP_1_0V (1 << 2) -# define TPS_VCORE_LP_0_85V (0 << 2) -# define TPS_VIB (1 << 1) -# define TPS_VCORE_DISCH (1 << 0) -#define TPS_VREGS1 0x0e -# define TPS_LDO2_ENABLE (1 << 7) -# define TPS_LDO2_OFF (1 << 6) -# define TPS_VLDO2_3_0V (3 << 4) -# define TPS_VLDO2_2_75V (2 << 4) -# define TPS_VLDO2_2_5V (1 << 4) -# define TPS_VLDO2_1_8V (0 << 4) -# define TPS_LDO1_ENABLE (1 << 3) -# define TPS_LDO1_OFF (1 << 2) -# define TPS_VLDO1_3_0V (3 << 0) -# define TPS_VLDO1_2_75V (2 << 0) -# define TPS_VLDO1_2_5V (1 << 0) -# define TPS_VLDO1_ADJ (0 << 0) -#define TPS_MASK3 0x0f -#define TPS_DEFGPIO 0x10 - -/* - * ---------------------------------------------------------------------------- - * Macros used by exported functions - * ---------------------------------------------------------------------------- - */ - -#define LED1 1 -#define LED2 2 -#define OFF 0 -#define ON 1 -#define BLINK 2 -#define GPIO1 1 -#define GPIO2 2 -#define GPIO3 3 -#define GPIO4 4 -#define LOW 0 -#define HIGH 1 - -/* - * ---------------------------------------------------------------------------- - * Exported functions - * ---------------------------------------------------------------------------- - */ - -/* Draw from VBUS: - * 0 mA -- DON'T DRAW (might supply power instead) - * 100 mA -- usb unit load (slowest charge rate) - * 500 mA -- usb high power (fast battery charge) - */ -extern int tps65010_set_vbus_draw(unsigned mA); - -/* tps65010_set_gpio_out_value parameter: - * gpio: GPIO1, GPIO2, GPIO3 or GPIO4 - * value: LOW or HIGH - */ -extern int tps65010_set_gpio_out_value(unsigned gpio, unsigned value); - -/* tps65010_set_led parameter: - * led: LED1 or LED2 - * mode: ON, OFF or BLINK - */ -extern int tps65010_set_led(unsigned led, unsigned mode); - -/* tps65010_set_vib parameter: - * value: ON or OFF - */ -extern int tps65010_set_vib(unsigned value); - -/* tps65010_set_low_pwr parameter: - * mode: ON or OFF - */ -extern int tps65010_set_low_pwr(unsigned mode); - -/* tps65010_config_vregs1 parameter: - * value to be written to VREGS1 register - * Note: The complete register is written, set all bits you need - */ -extern int tps65010_config_vregs1(unsigned value); - -/* tps65013_set_low_pwr parameter: - * mode: ON or OFF - */ -extern int tps65013_set_low_pwr(unsigned mode); - -/* tps65010_set_vdcdc2 - * value to be written to VDCDC2 - */ -extern int tps65010_config_vdcdc2(unsigned value); - -struct i2c_client; - -/** - * struct tps65010_board - packages GPIO and LED lines - * @base: the GPIO number to assign to GPIO-1 - * @outmask: bit (N-1) is set to allow GPIO-N to be used as an - * (open drain) output - * @setup: optional callback issued once the GPIOs are valid - * @teardown: optional callback issued before the GPIOs are invalidated - * @context: optional parameter passed to setup() and teardown() - * - * Board data may be used to package the GPIO (and LED) lines for use - * in by the generic GPIO and LED frameworks. The first four GPIOs - * starting at gpio_base are GPIO1..GPIO4. The next two are LED1/nPG - * and LED2 (with hardware blinking capability, not currently exposed). - * - * The @setup callback may be used with the kind of board-specific glue - * which hands the (now-valid) GPIOs to other drivers, or which puts - * devices in their initial states using these GPIOs. - */ -struct tps65010_board { - int base; - unsigned outmask; - - int (*setup)(struct i2c_client *client, void *context); - int (*teardown)(struct i2c_client *client, void *context); - void *context; -}; - -#endif /* __LINUX_I2C_TPS65010_H */ - diff --git a/include/linux/mfd/tps65010.h b/include/linux/mfd/tps65010.h new file mode 100644 index 000000000000..a1fb9bc5311d --- /dev/null +++ b/include/linux/mfd/tps65010.h @@ -0,0 +1,205 @@ +/* linux/mfd/tps65010.h + * + * Functions to access TPS65010 power management device. + * + * Copyright (C) 2004 Dirk Behme + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __LINUX_I2C_TPS65010_H +#define __LINUX_I2C_TPS65010_H + +/* + * ---------------------------------------------------------------------------- + * Registers, all 8 bits + * ---------------------------------------------------------------------------- + */ + +#define TPS_CHGSTATUS 0x01 +# define TPS_CHG_USB (1 << 7) +# define TPS_CHG_AC (1 << 6) +# define TPS_CHG_THERM (1 << 5) +# define TPS_CHG_TERM (1 << 4) +# define TPS_CHG_TAPER_TMO (1 << 3) +# define TPS_CHG_CHG_TMO (1 << 2) +# define TPS_CHG_PRECHG_TMO (1 << 1) +# define TPS_CHG_TEMP_ERR (1 << 0) +#define TPS_REGSTATUS 0x02 +# define TPS_REG_ONOFF (1 << 7) +# define TPS_REG_COVER (1 << 6) +# define TPS_REG_UVLO (1 << 5) +# define TPS_REG_NO_CHG (1 << 4) /* tps65013 */ +# define TPS_REG_PG_LD02 (1 << 3) +# define TPS_REG_PG_LD01 (1 << 2) +# define TPS_REG_PG_MAIN (1 << 1) +# define TPS_REG_PG_CORE (1 << 0) +#define TPS_MASK1 0x03 +#define TPS_MASK2 0x04 +#define TPS_ACKINT1 0x05 +#define TPS_ACKINT2 0x06 +#define TPS_CHGCONFIG 0x07 +# define TPS_CHARGE_POR (1 << 7) /* 65010/65012 */ +# define TPS65013_AUA (1 << 7) /* 65011/65013 */ +# define TPS_CHARGE_RESET (1 << 6) +# define TPS_CHARGE_FAST (1 << 5) +# define TPS_CHARGE_CURRENT (3 << 3) +# define TPS_VBUS_500MA (1 << 2) +# define TPS_VBUS_CHARGING (1 << 1) +# define TPS_CHARGE_ENABLE (1 << 0) +#define TPS_LED1_ON 0x08 +#define TPS_LED1_PER 0x09 +#define TPS_LED2_ON 0x0a +#define TPS_LED2_PER 0x0b +#define TPS_VDCDC1 0x0c +# define TPS_ENABLE_LP (1 << 3) +#define TPS_VDCDC2 0x0d +# define TPS_LP_COREOFF (1 << 7) +# define TPS_VCORE_1_8V (7<<4) +# define TPS_VCORE_1_5V (6 << 4) +# define TPS_VCORE_1_4V (5 << 4) +# define TPS_VCORE_1_3V (4 << 4) +# define TPS_VCORE_1_2V (3 << 4) +# define TPS_VCORE_1_1V (2 << 4) +# define TPS_VCORE_1_0V (1 << 4) +# define TPS_VCORE_0_85V (0 << 4) +# define TPS_VCORE_LP_1_2V (3 << 2) +# define TPS_VCORE_LP_1_1V (2 << 2) +# define TPS_VCORE_LP_1_0V (1 << 2) +# define TPS_VCORE_LP_0_85V (0 << 2) +# define TPS_VIB (1 << 1) +# define TPS_VCORE_DISCH (1 << 0) +#define TPS_VREGS1 0x0e +# define TPS_LDO2_ENABLE (1 << 7) +# define TPS_LDO2_OFF (1 << 6) +# define TPS_VLDO2_3_0V (3 << 4) +# define TPS_VLDO2_2_75V (2 << 4) +# define TPS_VLDO2_2_5V (1 << 4) +# define TPS_VLDO2_1_8V (0 << 4) +# define TPS_LDO1_ENABLE (1 << 3) +# define TPS_LDO1_OFF (1 << 2) +# define TPS_VLDO1_3_0V (3 << 0) +# define TPS_VLDO1_2_75V (2 << 0) +# define TPS_VLDO1_2_5V (1 << 0) +# define TPS_VLDO1_ADJ (0 << 0) +#define TPS_MASK3 0x0f +#define TPS_DEFGPIO 0x10 + +/* + * ---------------------------------------------------------------------------- + * Macros used by exported functions + * ---------------------------------------------------------------------------- + */ + +#define LED1 1 +#define LED2 2 +#define OFF 0 +#define ON 1 +#define BLINK 2 +#define GPIO1 1 +#define GPIO2 2 +#define GPIO3 3 +#define GPIO4 4 +#define LOW 0 +#define HIGH 1 + +/* + * ---------------------------------------------------------------------------- + * Exported functions + * ---------------------------------------------------------------------------- + */ + +/* Draw from VBUS: + * 0 mA -- DON'T DRAW (might supply power instead) + * 100 mA -- usb unit load (slowest charge rate) + * 500 mA -- usb high power (fast battery charge) + */ +extern int tps65010_set_vbus_draw(unsigned mA); + +/* tps65010_set_gpio_out_value parameter: + * gpio: GPIO1, GPIO2, GPIO3 or GPIO4 + * value: LOW or HIGH + */ +extern int tps65010_set_gpio_out_value(unsigned gpio, unsigned value); + +/* tps65010_set_led parameter: + * led: LED1 or LED2 + * mode: ON, OFF or BLINK + */ +extern int tps65010_set_led(unsigned led, unsigned mode); + +/* tps65010_set_vib parameter: + * value: ON or OFF + */ +extern int tps65010_set_vib(unsigned value); + +/* tps65010_set_low_pwr parameter: + * mode: ON or OFF + */ +extern int tps65010_set_low_pwr(unsigned mode); + +/* tps65010_config_vregs1 parameter: + * value to be written to VREGS1 register + * Note: The complete register is written, set all bits you need + */ +extern int tps65010_config_vregs1(unsigned value); + +/* tps65013_set_low_pwr parameter: + * mode: ON or OFF + */ +extern int tps65013_set_low_pwr(unsigned mode); + +/* tps65010_set_vdcdc2 + * value to be written to VDCDC2 + */ +extern int tps65010_config_vdcdc2(unsigned value); + +struct i2c_client; + +/** + * struct tps65010_board - packages GPIO and LED lines + * @base: the GPIO number to assign to GPIO-1 + * @outmask: bit (N-1) is set to allow GPIO-N to be used as an + * (open drain) output + * @setup: optional callback issued once the GPIOs are valid + * @teardown: optional callback issued before the GPIOs are invalidated + * @context: optional parameter passed to setup() and teardown() + * + * Board data may be used to package the GPIO (and LED) lines for use + * in by the generic GPIO and LED frameworks. The first four GPIOs + * starting at gpio_base are GPIO1..GPIO4. The next two are LED1/nPG + * and LED2 (with hardware blinking capability, not currently exposed). + * + * The @setup callback may be used with the kind of board-specific glue + * which hands the (now-valid) GPIOs to other drivers, or which puts + * devices in their initial states using these GPIOs. + */ +struct tps65010_board { + int base; + unsigned outmask; + + int (*setup)(struct i2c_client *client, void *context); + int (*teardown)(struct i2c_client *client, void *context); + void *context; +}; + +#endif /* __LINUX_I2C_TPS65010_H */ + -- cgit From d7b5f5cc5eb438270ba6c86207cc74fb492e1a57 Mon Sep 17 00:00:00 2001 From: Fenglin Wu Date: Tue, 15 Aug 2017 08:38:37 +0800 Subject: pinctrl: qcom: spmi-gpio: Add support for GPIO LV/MV subtype GPIO LV (low voltage)/MV (medium voltage) subtypes have different features and register mappings than 4CH/8CH subtypes. Add support for LV and MV subtypes. Signed-off-by: Fenglin Wu Acked-by: Rob Herring Acked-by: Bjorn Andersson Signed-off-by: Linus Walleij --- include/dt-bindings/pinctrl/qcom,pmic-gpio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/pinctrl/qcom,pmic-gpio.h b/include/dt-bindings/pinctrl/qcom,pmic-gpio.h index d33f17c8a515..b8ff8824e21b 100644 --- a/include/dt-bindings/pinctrl/qcom,pmic-gpio.h +++ b/include/dt-bindings/pinctrl/qcom,pmic-gpio.h @@ -98,6 +98,8 @@ #define PMIC_GPIO_FUNC_PAIRED "paired" #define PMIC_GPIO_FUNC_FUNC1 "func1" #define PMIC_GPIO_FUNC_FUNC2 "func2" +#define PMIC_GPIO_FUNC_FUNC3 "func3" +#define PMIC_GPIO_FUNC_FUNC4 "func4" #define PMIC_GPIO_FUNC_DTEST1 "dtest1" #define PMIC_GPIO_FUNC_DTEST2 "dtest2" #define PMIC_GPIO_FUNC_DTEST3 "dtest3" -- cgit From b3c4ec71ec413c2e5bfb028bdf1737af07f1fde0 Mon Sep 17 00:00:00 2001 From: Abdulhadi Mohamed Date: Wed, 19 Jul 2017 16:31:10 +0100 Subject: usb: gadget: f_hid: {GET,SET} PROTOCOL Support The current f_hid driver doesn't handle GET_PROCOTOL and SET_PROCOTOL requests, which are required to operate HID gadgets in BOOT mode. This patch implements this feature for devices that have the same implementation for REPORT and BOOT mode so that these devices are recognized by older BIOSes. Signed-off-by: Abdulhadi Mohamed Signed-off-by: Felipe Balbi --- include/linux/hid.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5006f9b5d837..6519cdc4c7d3 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -362,6 +362,12 @@ struct hid_item { #define HID_GROUP_WACOM 0x0101 #define HID_GROUP_LOGITECH_DJ_DEVICE 0x0102 +/* + * HID protocol status + */ +#define HID_REPORT_PROTOCOL 1 +#define HID_BOOT_PROTOCOL 0 + /* * This is the global environment of the parser. This information is * persistent for main-items. The global environment can be saved and -- cgit From 31fe084ffaaf8abece14f8ca28e5e3b4e2bf97b6 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Tue, 1 Aug 2017 02:00:56 -0700 Subject: usb: gadget: core: unmap request from DMA only if previously mapped In the SG case this is already handled since a non-zero request->num_mapped_sgs is a clear indicator that dma_map_sg() had been called. While it would be nice to do the same for the singly mapped case by simply checking for non-zero request->dma, it's conceivable that 0 is a valid dma_addr_t handle. Hence add a flag 'dma_mapped' to struct usb_request and use this to determine the need to call dma_unmap_single(). Otherwise, if a request is not DMA mapped then the result of calling usb_request_unmap_request() would safely be a no-op. Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 1a4a4bacfae6..21468a722c4a 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -48,6 +48,7 @@ struct usb_ep; * by adding a zero length packet as needed; * @short_not_ok: When reading data, makes short packets be * treated as errors (queue stops advancing till cleanup). + * @dma_mapped: Indicates if request has been mapped to DMA (internal) * @complete: Function called when request completes, so this request and * its buffer may be re-used. The function will always be called with * interrupts disabled, and it must not sleep. @@ -103,6 +104,7 @@ struct usb_request { unsigned no_interrupt:1; unsigned zero:1; unsigned short_not_ok:1; + unsigned dma_mapped:1; void (*complete)(struct usb_ep *ep, struct usb_request *req); -- cgit From a551e27368dea202cbef3e8861c21d965427cfe6 Mon Sep 17 00:00:00 2001 From: Chenglin Xu Date: Tue, 15 Aug 2017 17:09:15 +0800 Subject: regulator: mt6380: Add support for MT6380 The MT6380 is a regulator found those boards with MediaTek MT7622 SoC It is connected as a slave to the SoC using MediaTek PMIC wrapper which is the common interface connecting with Mediatek made various PMICs. Signed-off-by: Chenglin Xu Signed-off-by: Sean Wang Signed-off-by: Mark Brown --- include/linux/regulator/mt6380-regulator.h | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/regulator/mt6380-regulator.h (limited to 'include') diff --git a/include/linux/regulator/mt6380-regulator.h b/include/linux/regulator/mt6380-regulator.h new file mode 100644 index 000000000000..465182da6315 --- /dev/null +++ b/include/linux/regulator/mt6380-regulator.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 MediaTek Inc. + * Author: Chenglin Xu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_REGULATOR_mt6380_H +#define __LINUX_REGULATOR_mt6380_H + +enum { + MT6380_ID_VCPU = 0, + MT6380_ID_VCORE, + MT6380_ID_VRF, + MT6380_ID_VMLDO, + MT6380_ID_VALDO, + MT6380_ID_VPHYLDO, + MT6380_ID_VDDRLDO, + MT6380_ID_VTLDO, + MT6380_ID_RG_MAX, +}; + +#define MT6380_MAX_REGULATOR MT6380_ID_RG_MAX + +#endif /* __LINUX_REGULATOR_mt6380_H */ -- cgit From 2926a2aa5c14fb2add75e6584845b1c03022235f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Aug 2017 17:19:26 +0200 Subject: iommu: Fix wrong freeing of iommu_device->dev The struct iommu_device has a 'struct device' embedded into it, not as a pointer, but the whole struct. In the conversion of the iommu drivers to use struct iommu_device it was forgotten that the relase function for that struct device simply calls kfree() on the pointer. This frees memory that was never allocated and causes memory corruption. To fix this issue, use a pointer to struct device instead of embedding the whole struct. This needs some updates in the iommu sysfs code as well as the Intel VT-d and AMD IOMMU driver. Reported-by: Sebastian Ott Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Cc: stable@vger.kernel.org # >= v4.11 Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..176f7569d874 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -240,7 +240,7 @@ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; - struct device dev; + struct device *dev; }; int iommu_device_register(struct iommu_device *iommu); @@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, iommu->fwnode = fwnode; } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return (struct iommu_device *)dev_get_drvdata(dev); +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, { } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return NULL; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit From 129f6c4820dd68be26b516ba6d6e471e63855d47 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Jul 2017 22:26:25 +0200 Subject: mtd: only use __xipram annotation when XIP_KERNEL is set When XIP_KERNEL is enabled, some functions are defined in the .data ELF section because we require them to be in RAM whenever we communicate with the flash chip. However this causes problems when FTRACE is enabled and gcc emits calls to __gnu_mcount_nc in the function prolog: drivers/built-in.o: In function `cfi_chip_setup': :(.data+0x272fc): relocation truncated to fit: R_ARM_CALL against symbol `__gnu_mcount_nc' defined in .text section in arch/arm/kernel/built-in.o drivers/built-in.o: In function `cfi_probe_chip': :(.data+0x27de8): relocation truncated to fit: R_ARM_CALL against symbol `__gnu_mcount_nc' defined in .text section in arch/arm/kernel/built-in.o /tmp/ccY172rP.s: Assembler messages: /tmp/ccY172rP.s:70: Warning: ignoring changed section attributes for .data /tmp/ccY172rP.s: Error: 1 warning, treating warnings as errors make[5]: *** [drivers/mtd/chips/cfi_probe.o] Error 1 /tmp/ccK4rjeO.s: Assembler messages: /tmp/ccK4rjeO.s:421: Warning: ignoring changed section attributes for .data /tmp/ccK4rjeO.s: Error: 1 warning, treating warnings as errors make[5]: *** [drivers/mtd/chips/cfi_util.o] Error 1 /tmp/ccUvhCYR.s: Assembler messages: /tmp/ccUvhCYR.s:1895: Warning: ignoring changed section attributes for .data /tmp/ccUvhCYR.s: Error: 1 warning, treating warnings as errors Specifically, this does not work because the .data section is not marked executable, which leads LD to not generate trampolines for long calls. This moves the __xipram functions into their own .xiptext section instead. The section is still placed next to .data and located in RAM but is marked executable, which avoids the build errors. Also, we only need to place the XIP functions into a separate section if both CONFIG_XIP_KERNEL and CONFIG_MTD_XIP are set: When only MTD_XIP is used, the whole kernel is still in RAM and we do not need to worry about pulling out the rug under it. When only XIP_KERNEL but not MTD_XIP is set, the kernel is in some form of ROM, but we never write to it. Note that MTD_XIP has been broken on ARM since around 2011 or 2012. I have sent another patch[2] to fix compilation, which I plan to merge through arm-soc unless there are objections. The obvious alternative to that would be to completely rip out the MTD_XIP support from the kernel, since obviously nobody has been using it in a long while. Link: [1] https://patchwork.kernel.org/patch/8109771/ Link: [2] https://patchwork.kernel.org/patch/9855225/ Signed-off-by: Arnd Bergmann Signed-off-by: Boris Brezillon --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/mtd/xip.h | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index da0be9a8d1de..ff507c178327 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -203,6 +203,7 @@ * pull in .data..stuff which has its own requirements. Same for bss. */ #define DATA_DATA \ + *(.xiptext) \ *(.data .data.[0-9a-zA-Z_]*) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ diff --git a/include/linux/mtd/xip.h b/include/linux/mtd/xip.h index abed4dec5c2f..e373690cce0a 100644 --- a/include/linux/mtd/xip.h +++ b/include/linux/mtd/xip.h @@ -30,7 +30,9 @@ * obviously not be running from flash. The __xipram is therefore marking * those functions so they get relocated to ram. */ -#define __xipram noinline __attribute__ ((__section__ (".data"))) +#ifdef CONFIG_XIP_KERNEL +#define __xipram noinline __attribute__ ((__section__ (".xiptext"))) +#endif /* * Each architecture has to provide the following macros. They must access @@ -90,10 +92,10 @@ #define xip_cpu_idle() do { } while (0) #endif -#else +#endif /* CONFIG_MTD_XIP */ +#ifndef __xipram #define __xipram - -#endif /* CONFIG_MTD_XIP */ +#endif #endif /* __LINUX_MTD_XIP_H__ */ -- cgit From 44dd8a989c787e9077745417140aa132bfe45bf5 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 15 Aug 2017 19:07:53 +0800 Subject: include: uapi: usb: Introduce USB charger type and state definition Introducing USB charger type and state definition can help to support USB charging which will be added in USB phy core. Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/charger.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/uapi/linux/usb/charger.h (limited to 'include') diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h new file mode 100644 index 000000000000..5f72af35b3ed --- /dev/null +++ b/include/uapi/linux/usb/charger.h @@ -0,0 +1,31 @@ +/* + * This file defines the USB charger type and state that are needed for + * USB device APIs. + */ + +#ifndef _UAPI__LINUX_USB_CHARGER_H +#define _UAPI__LINUX_USB_CHARGER_H + +/* + * USB charger type: + * SDP (Standard Downstream Port) + * DCP (Dedicated Charging Port) + * CDP (Charging Downstream Port) + * ACA (Accessory Charger Adapters) + */ +enum usb_charger_type { + UNKNOWN_TYPE, + SDP_TYPE, + DCP_TYPE, + CDP_TYPE, + ACA_TYPE, +}; + +/* USB charger state */ +enum usb_charger_state { + USB_CHARGER_DEFAULT, + USB_CHARGER_PRESENT, + USB_CHARGER_ABSENT, +}; + +#endif /* _UAPI__LINUX_USB_CHARGER_H */ -- cgit From a9081a008f84819ab2f3da596bf89afa16beea94 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 15 Aug 2017 19:07:54 +0800 Subject: usb: phy: Add USB charger support This patch introduces the usb charger support based on usb phy that makes an enhancement to a power driver. The basic conception of the usb charger is that, when one usb charger is added or removed by reporting from the extcon device state change, the usb charger will report to power user to set the current limitation. Power user can register a notifiee on the usb phy by issuing usb_register_notifier() to get notified by charger status changes or charger current changes. we can notify what current to be drawn to power user according to different charger type, and now we have 2 methods to get charger type. One is get charger type from extcon subsystem, which also means the charger state changes. Another is we can get the charger type from USB controller detecting or PMIC detecting, and the charger state changes should be told by issuing usb_phy_set_charger_state(). Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index 299245105610..de881b171ba9 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -12,6 +12,7 @@ #include #include #include +#include enum usb_phy_interface { USBPHY_INTERFACE_MODE_UNKNOWN, @@ -72,6 +73,17 @@ struct usb_phy_io_ops { int (*write)(struct usb_phy *x, u32 val, u32 reg); }; +struct usb_charger_current { + unsigned int sdp_min; + unsigned int sdp_max; + unsigned int dcp_min; + unsigned int dcp_max; + unsigned int cdp_min; + unsigned int cdp_max; + unsigned int aca_min; + unsigned int aca_max; +}; + struct usb_phy { struct device *dev; const char *label; @@ -91,6 +103,13 @@ struct usb_phy { struct extcon_dev *id_edev; struct notifier_block vbus_nb; struct notifier_block id_nb; + struct notifier_block type_nb; + + /* Support USB charger */ + enum usb_charger_type chg_type; + enum usb_charger_state chg_state; + struct usb_charger_current chg_cur; + struct work_struct chg_work; /* for notification of usb_phy_events */ struct atomic_notifier_head notifier; @@ -129,6 +148,12 @@ struct usb_phy { enum usb_device_speed speed); int (*notify_disconnect)(struct usb_phy *x, enum usb_device_speed speed); + + /* + * Charger detection method can be implemented if you need to + * manually detect the charger type. + */ + enum usb_charger_type (*charger_detect)(struct usb_phy *x); }; /** @@ -219,6 +244,12 @@ extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x); extern int usb_bind_phy(const char *dev_name, u8 index, const char *phy_dev_name); extern void usb_phy_set_event(struct usb_phy *x, unsigned long event); +extern void usb_phy_set_charger_current(struct usb_phy *usb_phy, + unsigned int mA); +extern void usb_phy_get_charger_current(struct usb_phy *usb_phy, + unsigned int *min, unsigned int *max); +extern void usb_phy_set_charger_state(struct usb_phy *usb_phy, + enum usb_charger_state state); #else static inline struct usb_phy *usb_get_phy(enum usb_phy_type type) { @@ -270,11 +301,29 @@ static inline int usb_bind_phy(const char *dev_name, u8 index, static inline void usb_phy_set_event(struct usb_phy *x, unsigned long event) { } + +static inline void usb_phy_set_charger_current(struct usb_phy *usb_phy, + unsigned int mA) +{ +} + +static inline void usb_phy_get_charger_current(struct usb_phy *usb_phy, + unsigned int *min, + unsigned int *max) +{ +} + +static inline void usb_phy_set_charger_state(struct usb_phy *usb_phy, + enum usb_charger_state state) +{ +} #endif static inline int usb_phy_set_power(struct usb_phy *x, unsigned mA) { + usb_phy_set_charger_current(x, mA); + if (x && x->set_power) return x->set_power(x, mA); return 0; -- cgit From e24a1307ba1f99fc62a0bd61d5e87fcfb6d5503d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 28 Jul 2017 10:31:25 +0530 Subject: mm/hugetlb: Allow arch to override and call the weak function When running in guest mode ppc64 supports a different mechanism for hugetlb allocation/reservation. The LPAR management application called HMC can be used to reserve a set of hugepages and we pass the details of reserved pages via device tree to the guest. (more details in htab_dt_scan_hugepage_blocks()) . We do the memblock_reserve of the range and later in the boot sequence, we add the reserved range to huge_boot_pages. But to enable 16G hugetlb on baremetal config (when we are not running as guest) we want to do memblock reservation during boot. Generic code already does this Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- include/linux/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0ed8e41aaf11..8bbbd37ab105 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -358,6 +358,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); /* arch callback */ +int __init __alloc_bootmem_huge_page(struct hstate *h); int __init alloc_bootmem_huge_page(struct hstate *h); void __init hugetlb_bad_size(void); -- cgit From cf6e7bac6357f0ccca51fcb5eb325e724f6b4c95 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 15 Aug 2017 15:57:33 +0100 Subject: drm/i915: Add support for drm syncobjs This commit adds support for waiting on or signaling DRM syncobjs as part of execbuf. It does so by hijacking the currently unused cliprects pointer to instead point to an array of i915_gem_exec_fence structs which containe a DRM syncobj and a flags parameter which specifies whether to wait on it or to signal it. This implementation theoretically allows for both flags to be set in which case it waits on the dma_fence that was in the syncobj and then immediately replaces it with the dma_fence from the current execbuf. v2: - Rebase on new syncobj API v3: - Pull everything out into helpers - Do all allocation in gem_execbuffer2 - Pack the flags in the bottom 2 bits of the drm_syncobj* v4: - Prevent a potential race on syncobj->fence Testcase: igt/gem_exec_fence/syncobj* Signed-off-by: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/1499289202-25441-1-git-send-email-jason.ekstrand@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170815145733.4562-1-chris@chris-wilson.co.uk --- include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index ce3833fa1e06..6598fb76d2c2 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -435,6 +435,11 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + typedef struct drm_i915_getparam { __s32 param; /* @@ -816,6 +821,17 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +struct drm_i915_gem_exec_fence { + /** + * User's handle for a drm_syncobj to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) + __u32 flags; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -830,7 +846,11 @@ struct drm_i915_gem_execbuffer2 { __u32 DR1; __u32 DR4; __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ + /** + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_DEFAULT (0<<0) @@ -931,7 +951,14 @@ struct drm_i915_gem_execbuffer2 { * element). */ #define I915_EXEC_BATCH_FIRST (1<<18) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit From dee83046e73cb7ebbbae955c1ef0f4f55a0f44f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Jul 2017 10:51:02 -0400 Subject: NFS: Remove unuse function nfs_page_group_lock_wait() Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d67b67ae6c8b..de1d24cedaa2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -140,7 +140,6 @@ extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *, bool); -extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit From 1344b7ea172b4911a8ee8a6ff26c5bc6b5abb302 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Jul 2017 10:54:14 -0400 Subject: NFS: Remove unused parameter from nfs_page_group_lock() nfs_page_group_lock() is now always called with the 'nonblock' parameter set to 'false'. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index de1d24cedaa2..2f4fdafb6746 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,7 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern int nfs_page_group_lock(struct nfs_page *, bool); +extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit From e824f99adaaf1ed0e03eac8574599af6d992163d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 11:53:49 -0400 Subject: NFSv4: Use a mutex to protect the per-inode commit lists The commit lists can get very large, so using the inode->i_lock can end up affecting general metadata performance. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5cc91d6381a3..121a702888b4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,6 +163,7 @@ struct nfs_inode { /* Readers: in-flight sillydelete RPC calls */ /* Writers: rmdir */ struct rw_semaphore rmdir_sem; + struct mutex commit_mutex; #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; -- cgit From a6b6d5b85abf4914bbceade5dddd54c345c64136 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 15:39:46 -0400 Subject: NFS: Use an atomic_long_t to count the number of requests Rather than forcing us to take the inode->i_lock just in order to bump the number. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 121a702888b4..238fdc4c46df 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -154,7 +154,7 @@ struct nfs_inode { */ __be32 cookieverf[2]; - unsigned long nrequests; + atomic_long_t nrequests; struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ @@ -511,7 +511,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); static inline int nfs_have_writebacks(struct inode *inode) { - return NFS_I(inode)->nrequests != 0; + return atomic_long_read(&NFS_I(inode)->nrequests) != 0; } /* -- cgit From 5cb953d4b1e70a09084f71594c45d47458346bc2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Aug 2017 17:04:12 -0400 Subject: NFS: Use an atomic_long_t to count the number of commits Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 62cbcb842f99..164d5359d4ab 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1476,7 +1476,7 @@ struct nfs_pgio_header { struct nfs_mds_commit_info { atomic_t rpcs_out; - unsigned long ncommit; + atomic_long_t ncommit; struct list_head list; }; -- cgit From 5d2405227a9eaea48e8cc95756a06d407b11f141 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 4 Aug 2017 13:19:17 -0700 Subject: lib: Add xxhash module Adds xxhash kernel module with xxh32 and xxh64 hashes. xxhash is an extremely fast non-cryptographic hash algorithm for checksumming. The zstd compression and decompression modules added in the next patch require xxhash. I extracted it out from zstd since it is useful on its own. I copied the code from the upstream XXHash source repository and translated it into kernel style. I ran benchmarks and tests in the kernel and tests in userland. I benchmarked xxhash as a special character device. I ran in four modes, no-op, xxh32, xxh64, and crc32. The no-op mode simply copies the data to kernel space and ignores it. The xxh32, xxh64, and crc32 modes compute hashes on the copied data. I also ran it with four different buffer sizes. The benchmark file is located in the upstream zstd source repository under `contrib/linux-kernel/xxhash_test.c` [1]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using the file `filesystem.squashfs` from `ubuntu-16.10-desktop-amd64.iso`, which is 1,536,217,088 B large. Run the following commands for the benchmark: modprobe xxhash_test mknod xxhash_test c 245 0 time cp filesystem.squashfs xxhash_test The time is reported by the time of the userland `cp`. The GB/s is computed with 1,536,217,008 B / time(buffer size, hash) which includes the time to copy from userland. The Normalized GB/s is computed with 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)). | Buffer Size (B) | Hash | Time (s) | GB/s | Adjusted GB/s | |-----------------|-------|----------|------|---------------| | 1024 | none | 0.408 | 3.77 | - | | 1024 | xxh32 | 0.649 | 2.37 | 6.37 | | 1024 | xxh64 | 0.542 | 2.83 | 11.46 | | 1024 | crc32 | 1.290 | 1.19 | 1.74 | | 4096 | none | 0.380 | 4.04 | - | | 4096 | xxh32 | 0.645 | 2.38 | 5.79 | | 4096 | xxh64 | 0.500 | 3.07 | 12.80 | | 4096 | crc32 | 1.168 | 1.32 | 1.95 | | 8192 | none | 0.351 | 4.38 | - | | 8192 | xxh32 | 0.614 | 2.50 | 5.84 | | 8192 | xxh64 | 0.464 | 3.31 | 13.60 | | 8192 | crc32 | 1.163 | 1.32 | 1.89 | | 16384 | none | 0.346 | 4.43 | - | | 16384 | xxh32 | 0.590 | 2.60 | 6.30 | | 16384 | xxh64 | 0.466 | 3.30 | 12.80 | | 16384 | crc32 | 1.183 | 1.30 | 1.84 | Tested in userland using the test-suite in the zstd repo under `contrib/linux-kernel/test/XXHashUserlandTest.cpp` [2] by mocking the kernel functions. A line in each branch of every function in `xxhash.c` was commented out to ensure that the test-suite fails. Additionally tested while testing zstd and with SMHasher [3]. [1] https://phabricator.intern.facebook.com/P57526246 [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/test/XXHashUserlandTest.cpp [3] https://github.com/aappleby/smhasher zstd source repository: https://github.com/facebook/zstd XXHash source repository: https://github.com/cyan4973/xxhash Signed-off-by: Nick Terrell Signed-off-by: Chris Mason --- include/linux/xxhash.h | 236 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 include/linux/xxhash.h (limited to 'include') diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h new file mode 100644 index 000000000000..9e1f42cb57e9 --- /dev/null +++ b/include/linux/xxhash.h @@ -0,0 +1,236 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Copyright (C) 2012-2016, Yann Collet. + * + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + * + * You can contact the author at: + * - xxHash homepage: http://cyan4973.github.io/xxHash/ + * - xxHash source repository: https://github.com/Cyan4973/xxHash + */ + +/* + * Notice extracted from xxHash homepage: + * + * xxHash is an extremely fast Hash algorithm, running at RAM speed limits. + * It also successfully passes all tests from the SMHasher suite. + * + * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 + * Duo @3GHz) + * + * Name Speed Q.Score Author + * xxHash 5.4 GB/s 10 + * CrapWow 3.2 GB/s 2 Andrew + * MumurHash 3a 2.7 GB/s 10 Austin Appleby + * SpookyHash 2.0 GB/s 10 Bob Jenkins + * SBox 1.4 GB/s 9 Bret Mulvey + * Lookup3 1.2 GB/s 9 Bob Jenkins + * SuperFastHash 1.2 GB/s 1 Paul Hsieh + * CityHash64 1.05 GB/s 10 Pike & Alakuijala + * FNV 0.55 GB/s 5 Fowler, Noll, Vo + * CRC32 0.43 GB/s 9 + * MD5-32 0.33 GB/s 10 Ronald L. Rivest + * SHA1-32 0.28 GB/s 10 + * + * Q.Score is a measure of quality of the hash function. + * It depends on successfully passing SMHasher test set. + * 10 is a perfect score. + * + * A 64-bits version, named xxh64 offers much better speed, + * but for 64-bits applications only. + * Name Speed on 64 bits Speed on 32 bits + * xxh64 13.8 GB/s 1.9 GB/s + * xxh32 6.8 GB/s 6.0 GB/s + */ + +#ifndef XXHASH_H +#define XXHASH_H + +#include + +/*-**************************** + * Simple Hash Functions + *****************************/ + +/** + * xxh32() - calculate the 32-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s + * + * Return: The 32-bit hash of the data. + */ +uint32_t xxh32(const void *input, size_t length, uint32_t seed); + +/** + * xxh64() - calculate the 64-bit hash of the input with a given seed. + * + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems. + * + * Return: The 64-bit hash of the data. + */ +uint64_t xxh64(const void *input, size_t length, uint64_t seed); + +/*-**************************** + * Streaming Hash Functions + *****************************/ + +/* + * These definitions are only meant to allow allocation of XXH state + * statically, on stack, or in a struct for example. + * Do not use members directly. + */ + +/** + * struct xxh32_state - private xxh32 state, do not use members directly + */ +struct xxh32_state { + uint32_t total_len_32; + uint32_t large_len; + uint32_t v1; + uint32_t v2; + uint32_t v3; + uint32_t v4; + uint32_t mem32[4]; + uint32_t memsize; +}; + +/** + * struct xxh32_state - private xxh64 state, do not use members directly + */ +struct xxh64_state { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; +}; + +/** + * xxh32_reset() - reset the xxh32 state to start a new hashing operation + * + * @state: The xxh32 state to reset. + * @seed: Initialize the hash state with this seed. + * + * Call this function on any xxh32_state to prepare for a new hashing operation. + */ +void xxh32_reset(struct xxh32_state *state, uint32_t seed); + +/** + * xxh32_update() - hash the data given and update the xxh32 state + * + * @state: The xxh32 state to update. + * @input: The data to hash. + * @length: The length of the data to hash. + * + * After calling xxh32_reset() call xxh32_update() as many times as necessary. + * + * Return: Zero on success, otherwise an error code. + */ +int xxh32_update(struct xxh32_state *state, const void *input, size_t length); + +/** + * xxh32_digest() - produce the current xxh32 hash + * + * @state: Produce the current xxh32 hash of this state. + * + * A hash value can be produced at any time. It is still possible to continue + * inserting input into the hash state after a call to xxh32_digest(), and + * generate new hashes later on, by calling xxh32_digest() again. + * + * Return: The xxh32 hash stored in the state. + */ +uint32_t xxh32_digest(const struct xxh32_state *state); + +/** + * xxh64_reset() - reset the xxh64 state to start a new hashing operation + * + * @state: The xxh64 state to reset. + * @seed: Initialize the hash state with this seed. + */ +void xxh64_reset(struct xxh64_state *state, uint64_t seed); + +/** + * xxh64_update() - hash the data given and update the xxh64 state + * @state: The xxh64 state to update. + * @input: The data to hash. + * @length: The length of the data to hash. + * + * After calling xxh64_reset() call xxh64_update() as many times as necessary. + * + * Return: Zero on success, otherwise an error code. + */ +int xxh64_update(struct xxh64_state *state, const void *input, size_t length); + +/** + * xxh64_digest() - produce the current xxh64 hash + * + * @state: Produce the current xxh64 hash of this state. + * + * A hash value can be produced at any time. It is still possible to continue + * inserting input into the hash state after a call to xxh64_digest(), and + * generate new hashes later on, by calling xxh64_digest() again. + * + * Return: The xxh64 hash stored in the state. + */ +uint64_t xxh64_digest(const struct xxh64_state *state); + +/*-************************** + * Utils + ***************************/ + +/** + * xxh32_copy_state() - copy the source state into the destination state + * + * @src: The source xxh32 state. + * @dst: The destination xxh32 state. + */ +void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src); + +/** + * xxh64_copy_state() - copy the source state into the destination state + * + * @src: The source xxh64 state. + * @dst: The destination xxh64 state. + */ +void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src); + +#endif /* XXHASH_H */ -- cgit From 73f3d1b48f5069d46ba48aa28c2898dc93185560 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 9 Aug 2017 19:35:53 -0700 Subject: lib: Add zstd modules Add zstd compression and decompression kernel modules. zstd offers a wide varity of compression speed and quality trade-offs. It can compress at speeds approaching lz4, and quality approaching lzma. zstd decompressions at speeds more than twice as fast as zlib, and decompression speed remains roughly the same across all compression levels. The code was ported from the upstream zstd source repository. The `linux/zstd.h` header was modified to match linux kernel style. The cross-platform and allocation code was stripped out. Instead zstd requires the caller to pass a preallocated workspace. The source files were clang-formatted [1] to match the Linux Kernel style as much as possible. Otherwise, the code was unmodified. We would like to avoid as much further manual modification to the source code as possible, so it will be easier to keep the kernel zstd up to date. I benchmarked zstd compression as a special character device. I ran zstd and zlib compression at several levels, as well as performing no compression, which measure the time spent copying the data to kernel space. Data is passed to the compresser 4096 B at a time. The benchmark file is located in the upstream zstd source repository under `contrib/linux-kernel/zstd_compress_test.c` [2]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using `silesia.tar` [3], which is 211,988,480 B large. Run the following commands for the benchmark: sudo modprobe zstd_compress_test sudo mknod zstd_compress_test c 245 0 sudo cp silesia.tar zstd_compress_test The time is reported by the time of the userland `cp`. The MB/s is computed with 1,536,217,008 B / time(buffer size, hash) which includes the time to copy from userland. The Adjusted MB/s is computed with 1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)). The memory reported is the amount of memory the compressor requests. | Method | Size (B) | Time (s) | Ratio | MB/s | Adj MB/s | Mem (MB) | |----------|----------|----------|-------|---------|----------|----------| | none | 11988480 | 0.100 | 1 | 2119.88 | - | - | | zstd -1 | 73645762 | 1.044 | 2.878 | 203.05 | 224.56 | 1.23 | | zstd -3 | 66988878 | 1.761 | 3.165 | 120.38 | 127.63 | 2.47 | | zstd -5 | 65001259 | 2.563 | 3.261 | 82.71 | 86.07 | 2.86 | | zstd -10 | 60165346 | 13.242 | 3.523 | 16.01 | 16.13 | 13.22 | | zstd -15 | 58009756 | 47.601 | 3.654 | 4.45 | 4.46 | 21.61 | | zstd -19 | 54014593 | 102.835 | 3.925 | 2.06 | 2.06 | 60.15 | | zlib -1 | 77260026 | 2.895 | 2.744 | 73.23 | 75.85 | 0.27 | | zlib -3 | 72972206 | 4.116 | 2.905 | 51.50 | 52.79 | 0.27 | | zlib -6 | 68190360 | 9.633 | 3.109 | 22.01 | 22.24 | 0.27 | | zlib -9 | 67613382 | 22.554 | 3.135 | 9.40 | 9.44 | 0.27 | I benchmarked zstd decompression using the same method on the same machine. The benchmark file is located in the upstream zstd repo under `contrib/linux-kernel/zstd_decompress_test.c` [4]. The memory reported is the amount of memory required to decompress data compressed with the given compression level. If you know the maximum size of your input, you can reduce the memory usage of decompression irrespective of the compression level. | Method | Time (s) | MB/s | Adjusted MB/s | Memory (MB) | |----------|----------|---------|---------------|-------------| | none | 0.025 | 8479.54 | - | - | | zstd -1 | 0.358 | 592.15 | 636.60 | 0.84 | | zstd -3 | 0.396 | 535.32 | 571.40 | 1.46 | | zstd -5 | 0.396 | 535.32 | 571.40 | 1.46 | | zstd -10 | 0.374 | 566.81 | 607.42 | 2.51 | | zstd -15 | 0.379 | 559.34 | 598.84 | 4.61 | | zstd -19 | 0.412 | 514.54 | 547.77 | 8.80 | | zlib -1 | 0.940 | 225.52 | 231.68 | 0.04 | | zlib -3 | 0.883 | 240.08 | 247.07 | 0.04 | | zlib -6 | 0.844 | 251.17 | 258.84 | 0.04 | | zlib -9 | 0.837 | 253.27 | 287.64 | 0.04 | Tested in userland using the test-suite in the zstd repo under `contrib/linux-kernel/test/UserlandTest.cpp` [5] by mocking the kernel functions. Fuzz tested using libfuzzer [6] with the fuzz harnesses under `contrib/linux-kernel/test/{RoundTripCrash.c,DecompressCrash.c}` [7] [8] with ASAN, UBSAN, and MSAN. Additionaly, it was tested while testing the BtrFS and SquashFS patches coming next. [1] https://clang.llvm.org/docs/ClangFormat.html [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/zstd_compress_test.c [3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia [4] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/zstd_decompress_test.c [5] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/test/UserlandTest.cpp [6] http://llvm.org/docs/LibFuzzer.html [7] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/test/RoundTripCrash.c [8] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/test/DecompressCrash.c zstd source repository: https://github.com/facebook/zstd Signed-off-by: Nick Terrell Signed-off-by: Chris Mason --- include/linux/zstd.h | 1157 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1157 insertions(+) create mode 100644 include/linux/zstd.h (limited to 'include') diff --git a/include/linux/zstd.h b/include/linux/zstd.h new file mode 100644 index 000000000000..249575e2485f --- /dev/null +++ b/include/linux/zstd.h @@ -0,0 +1,1157 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd. + * An additional grant of patent rights can be found in the PATENTS file in the + * same directory. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + */ + +#ifndef ZSTD_H +#define ZSTD_H + +/* ====== Dependency ======*/ +#include /* size_t */ + + +/*-***************************************************************************** + * Introduction + * + * zstd, short for Zstandard, is a fast lossless compression algorithm, + * targeting real-time compression scenarios at zlib-level and better + * compression ratios. The zstd compression library provides in-memory + * compression and decompression functions. The library supports compression + * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled + * ultra, should be used with caution, as they require more memory. + * Compression can be done in: + * - a single step, reusing a context (described as Explicit memory management) + * - unbounded multiple steps (described as Streaming compression) + * The compression ratio achievable on small data can be highly improved using + * compression with a dictionary in: + * - a single step (described as Simple dictionary API) + * - a single step, reusing a dictionary (described as Fast dictionary API) + ******************************************************************************/ + +/*====== Helper functions ======*/ + +/** + * enum ZSTD_ErrorCode - zstd error codes + * + * Functions that return size_t can be checked for errors using ZSTD_isError() + * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + */ +typedef enum { + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_version_unsupported, + ZSTD_error_parameter_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_frameParameter_windowTooLarge, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_checksum_wrong, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, + ZSTD_error_dictionaryCreation_failed, + ZSTD_error_maxCode +} ZSTD_ErrorCode; + +/** + * ZSTD_maxCLevel() - maximum compression level available + * + * Return: Maximum compression level available. + */ +int ZSTD_maxCLevel(void); +/** + * ZSTD_compressBound() - maximum compressed size in worst case scenario + * @srcSize: The size of the data to compress. + * + * Return: The maximum compressed size in the worst case scenario. + */ +size_t ZSTD_compressBound(size_t srcSize); +/** + * ZSTD_isError() - tells if a size_t function result is an error code + * @code: The function result to check for error. + * + * Return: Non-zero iff the code is an error. + */ +static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) +{ + return code > (size_t)-ZSTD_error_maxCode; +} +/** + * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode + * @functionResult: The result of a function for which ZSTD_isError() is true. + * + * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 + * if the functionResult isn't an error. + */ +static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( + size_t functionResult) +{ + if (!ZSTD_isError(functionResult)) + return (ZSTD_ErrorCode)0; + return (ZSTD_ErrorCode)(0 - functionResult); +} + +/** + * enum ZSTD_strategy - zstd compression search strategy + * + * From faster to stronger. + */ +typedef enum { + ZSTD_fast, + ZSTD_dfast, + ZSTD_greedy, + ZSTD_lazy, + ZSTD_lazy2, + ZSTD_btlazy2, + ZSTD_btopt, + ZSTD_btopt2 +} ZSTD_strategy; + +/** + * struct ZSTD_compressionParameters - zstd compression parameters + * @windowLog: Log of the largest match distance. Larger means more + * compression, and more memory needed during decompression. + * @chainLog: Fully searched segment. Larger means more compression, slower, + * and more memory (useless for fast). + * @hashLog: Dispatch table. Larger means more compression, + * slower, and more memory. + * @searchLog: Number of searches. Larger means more compression and slower. + * @searchLength: Match length searched. Larger means faster decompression, + * sometimes less compression. + * @targetLength: Acceptable match size for optimal parser (only). Larger means + * more compression, and slower. + * @strategy: The zstd compression strategy. + */ +typedef struct { + unsigned int windowLog; + unsigned int chainLog; + unsigned int hashLog; + unsigned int searchLog; + unsigned int searchLength; + unsigned int targetLength; + ZSTD_strategy strategy; +} ZSTD_compressionParameters; + +/** + * struct ZSTD_frameParameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the frame + * header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the end + * of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame header + * when using dictionary compression. + * + * The default value is all fields set to 0. + */ +typedef struct { + unsigned int contentSizeFlag; + unsigned int checksumFlag; + unsigned int noDictIDFlag; +} ZSTD_frameParameters; + +/** + * struct ZSTD_parameters - zstd parameters + * @cParams: The compression parameters. + * @fParams: The frame parameters. + */ +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +/** + * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * Return: The selected ZSTD_compressionParameters. + */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); + +/** + * ZSTD_getParams() - returns ZSTD_parameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * The same as ZSTD_getCParams() except also selects the default frame + * parameters (all zero). + * + * Return: The selected ZSTD_parameters. + */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); + +/*-************************************* + * Explicit memory management + **************************************/ + +/** + * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx + * @cParams: The compression parameters to be used for compression. + * + * If multiple compression parameters might be used, the caller must call + * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * size. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initCCtx(). + */ +size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); + +/** + * struct ZSTD_CCtx - the zstd compression context + * + * When compressing many times it is recommended to allocate a context just once + * and reuse it for each successive compression operation. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +/** + * ZSTD_initCCtx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A compression context emplaced into workspace. + */ +ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); + +/** + * ZSTD_compressCCtx() - compress src into dst + * @ctx: The context. Must have been initialized with a workspace at + * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, ZSTD_parameters params); + +/** + * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initDCtx(). + */ +size_t ZSTD_DCtxWorkspaceBound(void); + +/** + * struct ZSTD_DCtx - the zstd decompression context + * + * When decompressing many times it is recommended to allocate a context just + * once and reuse it for each successive decompression operation. + */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +/** + * ZSTD_initDCtx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A decompression context emplaced into workspace. + */ +ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); + +/** + * ZSTD_decompressDCtx() - decompress zstd compressed src into dst + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); + +/*-************************ + * Simple dictionary API + **************************/ + +/** + * ZSTD_compress_usingDict() - compress src into dst using a dictionary + * @ctx: The context. Must have been initialized with a workspace at + * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @dict: The dictionary to use for compression. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a predefined dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize, + ZSTD_parameters params); + +/** + * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @dict: The dictionary to use for decompression. The same dictionary + * must've been used to compress the data. + * @dictSize: The size of the dictionary. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize); + +/*-************************** + * Fast dictionary API + ***************************/ + +/** + * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initCDict(). + */ +size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); + +/** + * struct ZSTD_CDict - a digested dictionary to be used for compression + */ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/** + * ZSTD_initCDict() - initialize a digested dictionary for compression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_CDict so it must outlive the returned ZSTD_CDict. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * @workspace: The workspace. It must outlive the returned ZSTD_CDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_CDictWorkspaceBound(params.cParams). + * + * When compressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_CDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_CDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, + ZSTD_parameters params, void *workspace, size_t workspaceSize); + +/** + * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict + * @ctx: The context. Must have been initialized with a workspace at + * least as large as ZSTD_CCtxWorkspaceBound(cParams) where + * cParams are the compression parameters used to initialize the + * cdict. + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @cdict: The digested dictionary to use for compression. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a digested dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const ZSTD_CDict *cdict); + + +/** + * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initDDict(). + */ +size_t ZSTD_DDictWorkspaceBound(void); + +/** + * struct ZSTD_DDict - a digested dictionary to be used for decompression + */ +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/** + * ZSTD_initDDict() - initialize a digested dictionary for decompression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_DDict so it must outlive the returned ZSTD_DDict. + * @dictSize: The size of the dictionary. + * @workspace: The workspace. It must outlive the returned ZSTD_DDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_DDictWorkspaceBound(). + * + * When decompressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_DDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_DDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, + void *workspace, size_t workspaceSize); + +/** + * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @ddict: The digested dictionary to use for decompression. The same + * dictionary must've been used to compress the data. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, + size_t dstCapacity, const void *src, size_t srcSize, + const ZSTD_DDict *ddict); + + +/*-************************** + * Streaming + ***************************/ + +/** + * struct ZSTD_inBuffer - input buffer for streaming + * @src: Start of the input buffer. + * @size: Size of the input buffer. + * @pos: Position where reading stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ +typedef struct ZSTD_inBuffer_s { + const void *src; + size_t size; + size_t pos; +} ZSTD_inBuffer; + +/** + * struct ZSTD_outBuffer - output buffer for streaming + * @dst: Start of the output buffer. + * @size: Size of the output buffer. + * @pos: Position where writing stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ +typedef struct ZSTD_outBuffer_s { + void *dst; + size_t size; + size_t pos; +} ZSTD_outBuffer; + + + +/*-***************************************************************************** + * Streaming compression - HowTo + * + * A ZSTD_CStream object is required to track streaming operation. + * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. + * ZSTD_CStream objects can be reused multiple times on consecutive compression + * operations. It is recommended to re-use ZSTD_CStream in situations where many + * streaming operations will be achieved consecutively. Use one separate + * ZSTD_CStream per thread for parallel execution. + * + * Use ZSTD_compressStream() repetitively to consume input stream. + * The function will automatically update both `pos` fields. + * Note that it may not consume the entire input, in which case `pos < size`, + * and it's up to the caller to present again remaining data. + * It returns a hint for the preferred number of bytes to use as an input for + * the next function call. + * + * At any moment, it's possible to flush whatever data remains within internal + * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might + * still be some content left within the internal buffer if `output->size` is + * too small. It returns the number of bytes left in the internal buffer and + * must be called until it returns 0. + * + * ZSTD_endStream() instructs to finish a frame. It will perform a flush and + * write frame epilogue. The epilogue is required for decoders to consider a + * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush + * the full content if `output->size` is too small. In which case, call again + * ZSTD_endStream() to complete the flush. It returns the number of bytes left + * in the internal buffer and must be called until it returns 0. + ******************************************************************************/ + +/** + * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). + */ +size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); + +/** + * struct ZSTD_CStream - the zstd streaming compression context + */ +typedef struct ZSTD_CStream_s ZSTD_CStream; + +/*===== ZSTD_CStream management functions =====*/ +/** + * ZSTD_initCStream() - initialize a zstd streaming compression context + * @params: The zstd compression parameters. + * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must + * pass the source size (zero means empty source). Otherwise, + * the caller may optionally pass the source size, or zero if + * unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine + * how large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); + +/** + * ZSTD_initCStream_usingCDict() - initialize a streaming compression context + * @cdict: The digested dictionary to use for compression. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() + * with the cParams used to initialize the cdict to determine + * how large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); + +/*===== Streaming compression functions =====*/ +/** + * ZSTD_resetCStream() - reset the context using parameters from creation + * @zcs: The zstd streaming compression context to reset. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * content size is always written into the frame header. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +/** + * ZSTD_compressStream() - streaming compress some of input into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input->pos < input->size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); +/** + * ZSTD_flushStream() - flush internal buffers into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_flushStream() must be called until it returns 0, meaning all the data + * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * calling it too often will degrade the compression ratio. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +/** + * ZSTD_endStream() - flush internal buffers into output and end the frame + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_endStream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); + +/** + * ZSTD_CStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_CStreamInSize(void); +/** + * ZSTD_CStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete compressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_CStreamOutSize(void); + + + +/*-***************************************************************************** + * Streaming decompression - HowTo + * + * A ZSTD_DStream object is required to track streaming operations. + * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. + * ZSTD_DStream objects can be re-used multiple times. + * + * Use ZSTD_decompressStream() repetitively to consume your input. + * The function will update both `pos` fields. + * If `input->pos < input->size`, some input has not been consumed. + * It's up to the caller to present again remaining data. + * If `output->pos < output->size`, decoder has flushed everything it could. + * Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise it returns a suggested next input size that will never load more + * than the current frame. + ******************************************************************************/ + +/** + * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream + * @maxWindowSize: The maximum window size allowed for compressed frames. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). + */ +size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); + +/** + * struct ZSTD_DStream - the zstd streaming decompression context + */ +typedef struct ZSTD_DStream_s ZSTD_DStream; +/*===== ZSTD_DStream management functions =====*/ +/** + * ZSTD_initDStream() - initialize a zstd streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, + size_t workspaceSize); +/** + * ZSTD_initDStream_usingDDict() - initialize streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @ddict: The digested dictionary to use for decompression. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, + const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); + +/*===== Streaming decompression functions =====*/ +/** + * ZSTD_resetDStream() - reset the context using parameters from creation + * @zds: The zstd streaming decompression context to reset. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetDStream(ZSTD_DStream *zds); +/** + * ZSTD_decompressStream() - streaming decompress some of input into output + * @zds: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * ZSTD_decompressStream() will not consume the last byte of the frame until + * the entire frame is flushed. + * + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the input + * for the next function call or an error, which can be checked using + * ZSTD_isError(). The size hint will never load more than the frame. + */ +size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); + +/** + * ZSTD_DStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_DStreamInSize(void); +/** + * ZSTD_DStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete decompressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_DStreamOutSize(void); + + +/* --- Constants ---*/ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U + +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + +#define ZSTD_WINDOWLOG_MAX_32 27 +#define ZSTD_WINDOWLOG_MAX_64 27 +#define ZSTD_WINDOWLOG_MAX \ + ((unsigned int)(sizeof(size_t) == 4 \ + ? ZSTD_WINDOWLOG_MAX_32 \ + : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_HASHLOG3_MAX 17 +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +/* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_SEARCHLENGTH_MAX 7 +/* only for ZSTD_btopt, other strategies are limited to 4 */ +#define ZSTD_SEARCHLENGTH_MIN 3 +#define ZSTD_TARGETLENGTH_MIN 4 +#define ZSTD_TARGETLENGTH_MAX 999 + +/* for static allocation */ +#define ZSTD_FRAMEHEADERSIZE_MAX 18 +#define ZSTD_FRAMEHEADERSIZE_MIN 6 +static const size_t ZSTD_frameHeaderSize_prefix = 5; +static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; +static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; +/* magic number + skippable frame length */ +static const size_t ZSTD_skippableHeaderSize = 8; + + +/*-************************************* + * Compressed size functions + **************************************/ + +/** + * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded frame + * or a skippable frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * size of the frame. + * + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with ZSTD_isError(). + * Suitable to pass to ZSTD_decompress() or similar functions. + */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); + +/*-************************************* + * Decompressed size functions + **************************************/ +/** + * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header + * @src: It should point to the start of a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The frame content size stored in the frame header if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the + * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. + */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/** + * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames + * @src: It should point to the start of a series of zstd encoded and/or + * skippable frames. + * @srcSize: The exact size of the series of frames. + * + * If any zstd encoded frame in the series doesn't have the frame content size + * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always + * set when using ZSTD_compress(). The decompressed size can be very large. + * If the source is untrusted, the decompressed size could be wrong or + * intentionally modified. Always ensure the result fits within the + * application's authorized limits. ZSTD_findDecompressedSize() handles multiple + * frames, and so it must traverse the input to read each frame header. This is + * efficient as most of the data is skipped, however it does mean that all frame + * data must be present and valid. + * + * Return: Decompressed size of all the data contained in the frames if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. + * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. + */ +unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); + +/*-************************************* + * Advanced compression functions + **************************************/ +/** + * ZSTD_checkCParams() - ensure parameter values remain within authorized range + * @cParams: The zstd compression parameters. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); + +/** + * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize + * @srcSize: Optionally the estimated source size, or zero if unknown. + * @dictSize: Optionally the estimated dictionary size, or zero if unknown. + * + * Return: The optimized parameters. + */ +ZSTD_compressionParameters ZSTD_adjustCParams( + ZSTD_compressionParameters cParams, unsigned long long srcSize, + size_t dictSize); + +/*--- Advanced decompression functions ---*/ + +/** + * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame + * @buffer: The source buffer to check. + * @size: The size of the source buffer, must be at least 4 bytes. + * + * Return: True iff the buffer starts with a zstd or skippable frame identifier. + */ +unsigned int ZSTD_isFrame(const void *buffer, size_t size); + +/** + * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary + * @dict: The dictionary buffer. + * @dictSize: The size of the dictionary buffer. + * + * Return: The dictionary id stored within the dictionary or 0 if the + * dictionary is not a zstd dictionary. If it returns 0 the + * dictionary can still be loaded as a content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); + +/** + * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict + * @ddict: The ddict to find the id of. + * + * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not + * a zstd dictionary. If it returns 0 `ddict` will be loaded as a + * content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); + +/** + * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame + * @src: Source buffer. It must be a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The dictionary id required to decompress the frame stored within + * `src` or 0 if the dictionary id could not be decoded. It can return + * 0 if the frame does not require a dictionary, the dictionary id + * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` + * is too small. + */ +unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); + +/** + * struct ZSTD_frameParams - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or 0 if not present. + * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @dictID: The dictionary id, or 0 if not present. + * @checksumFlag: Whether a checksum was used. + */ +typedef struct { + unsigned long long frameContentSize; + unsigned int windowSize; + unsigned int dictID; + unsigned int checksumFlag; +} ZSTD_frameParams; + +/** + * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame + * @fparamsPtr: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is + * always large enough to succeed. + * + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, + size_t srcSize); + +/*-***************************************************************************** + * Buffer-less and synchronous inner streaming functions + * + * This is an advanced API, giving full control over buffer management, for + * users which need direct control over memory. + * But it's also a complex one, with many restrictions (documented below). + * Prefer using normal streaming API for an easier experience + ******************************************************************************/ + +/*-***************************************************************************** + * Buffer-less streaming compression (synchronous mode) + * + * A ZSTD_CCtx object is required to track streaming operations. + * Use ZSTD_initCCtx() to initialize a context. + * ZSTD_CCtx object can be re-used multiple times within successive compression + * operations. + * + * Start by initializing a context. + * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary + * compression, + * or ZSTD_compressBegin_advanced(), for finer parameter control. + * It's also possible to duplicate a reference context which has already been + * initialized, using ZSTD_copyCCtx() + * + * Then, consume your input using ZSTD_compressContinue(). + * There are some important considerations to keep in mind when using this + * advanced function : + * - ZSTD_compressContinue() has no internal buffer. It uses externally provided + * buffer only. + * - Interface is synchronous : input is consumed entirely and produce 1+ + * (or more) compressed blocks. + * - Caller must ensure there is enough space in `dst` to store compressed data + * under worst case scenario. Worst case evaluation is provided by + * ZSTD_compressBound(). + * ZSTD_compressContinue() doesn't guarantee recover after a failed + * compression. + * - ZSTD_compressContinue() presumes prior input ***is still accessible and + * unmodified*** (up to maximum distance size, see WindowLog). + * It remembers all previous contiguous blocks, plus one separated memory + * segment (which can itself consists of multiple contiguous blocks) + * - ZSTD_compressContinue() detects that prior input has been overwritten when + * `src` buffer overlaps. In which case, it will "discard" the relevant memory + * section from its history. + * + * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) + * and optional checksum. It's possible to use srcSize==0, in which case, it + * will write a final empty block to end the frame. Without last block mark, + * frames will be considered unfinished (corrupted) by decoders. + * + * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new + * frame. + ******************************************************************************/ + +/*===== Buffer-less streaming compression functions =====*/ +size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, int compressionLevel); +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, ZSTD_parameters params, + unsigned long long pledgedSrcSize); +size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); + + + +/*-***************************************************************************** + * Buffer-less streaming decompression (synchronous mode) + * + * A ZSTD_DCtx object is required to track streaming operations. + * Use ZSTD_initDCtx() to initialize a context. + * A ZSTD_DCtx object can be re-used multiple times. + * + * First typical operation is to retrieve frame parameters, using + * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide + * important information to correctly decode the frame, such as the minimum + * rolling buffer size to allocate to decompress data (`windowSize`), and the + * dictionary ID used. + * Note: content size is optional, it may not be present. 0 means unknown. + * Note that these values could be wrong, either because of data malformation, + * or because an attacker is spoofing deliberate false information. As a + * consequence, check that values remain within valid application range, + * especially `windowSize`, before allocation. Each application can set its own + * limit, depending on local restrictions. For extended interoperability, it is + * recommended to support at least 8 MB. + * Frame parameters are extracted from the beginning of the compressed frame. + * Data fragment must be large enough to ensure successful decoding, typically + * `ZSTD_frameHeaderSize_max` bytes. + * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. + * >0: `srcSize` is too small, provide at least this many bytes. + * errorCode, which can be tested using ZSTD_isError(). + * + * Start decompression, with ZSTD_decompressBegin() or + * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared + * context, using ZSTD_copyDCtx(). + * + * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() + * alternatively. + * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' + * to ZSTD_decompressContinue(). + * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will + * fail. + * + * The result of ZSTD_decompressContinue() is the number of bytes regenerated + * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an + * error; it just means ZSTD_decompressContinue() has decoded some metadata + * item. It can also be an error code, which can be tested with ZSTD_isError(). + * + * ZSTD_decompressContinue() needs previous data blocks during decompression, up + * to `windowSize`. They should preferably be located contiguously, prior to + * current block. Alternatively, a round buffer of sufficient size is also + * possible. Sufficient size is determined by frame parameters. + * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't + * follow each other, make sure that either the compressor breaks contiguity at + * the same place, or that previous contiguous segment is large enough to + * properly handle maximum back-reference. + * + * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + * Context can then be reset to start a new decompression. + * + * Note: it's possible to know if next input to present is a header or a block, + * using ZSTD_nextInputType(). This information is not required to properly + * decode a frame. + * + * == Special case: skippable frames == + * + * Skippable frames allow integration of user-defined data into a flow of + * concatenated frames. Skippable frames will be ignored (skipped) by a + * decompressor. The format of skippable frames is as follows: + * a) Skippable frame ID - 4 Bytes, Little endian format, any value from + * 0x184D2A50 to 0x184D2A5F + * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + * c) Frame Content - any content (User Data) of length equal to Frame Size + * For skippable frames ZSTD_decompressContinue() always returns 0. + * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 + * what means that a frame is skippable. + * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might + * actually be a zstd encoded frame with no content. For purposes of + * decompression, it is valid in both cases to skip the frame using + * ZSTD_findFrameCompressedSize() to find its size in bytes. + * It also returns frame size as fparamsPtr->frameContentSize. + ******************************************************************************/ + +/*===== Buffer-less streaming decompression functions =====*/ +size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, + size_t dictSize); +void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); +size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +typedef enum { + ZSTDnit_frameHeader, + ZSTDnit_blockHeader, + ZSTDnit_block, + ZSTDnit_lastBlock, + ZSTDnit_checksum, + ZSTDnit_skippableFrame +} ZSTD_nextInputType_e; +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); + +/*-***************************************************************************** + * Block functions + * + * Block functions produce and decode raw zstd blocks, without frame metadata. + * Frame metadata cost is typically ~18 bytes, which can be non-negligible for + * very small blocks (< 100 bytes). User will have to take in charge required + * information to regenerate data, such as compressed and content sizes. + * + * A few rules to respect: + * - Compressing and decompressing require a context structure + * + Use ZSTD_initCCtx() and ZSTD_initDCtx() + * - It is necessary to init context before starting + * + compression : ZSTD_compressBegin() + * + decompression : ZSTD_decompressBegin() + * + variants _usingDict() are also allowed + * + copyCCtx() and copyDCtx() work too + * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() + * + If you need to compress more, cut data into multiple blocks + * + Consider using the regular ZSTD_compress() instead, as frame metadata + * costs become negligible when source size is large. + * - When a block is considered not compressible enough, ZSTD_compressBlock() + * result will be zero. In which case, nothing is produced into `dst`. + * + User must test for such outcome and deal directly with uncompressed data + * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! + * + In case of multiple successive blocks, decoder must be informed of + * uncompressed block existence to follow proper history. Use + * ZSTD_insertBlock() in such a case. + ******************************************************************************/ + +/* Define for static allocation */ +#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) +/*===== Raw zstd block functions =====*/ +size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); +size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, + size_t blockSize); + +#endif /* ZSTD_H */ -- cgit From 5c1aab1dd5445ed8bdcdbb575abc1b0d7ee5b2e7 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 9 Aug 2017 19:39:02 -0700 Subject: btrfs: Add zstd support Add zstd compression and decompression support to BtrFS. zstd at its fastest level compresses almost as well as zlib, while offering much faster compression and decompression, approaching lzo speeds. I benchmarked btrfs with zstd compression against no compression, lzo compression, and zlib compression. I benchmarked two scenarios. Copying a set of files to btrfs, and then reading the files. Copying a tarball to btrfs, extracting it to btrfs, and then reading the extracted files. After every operation, I call `sync` and include the sync time. Between every pair of operations I unmount and remount the filesystem to avoid caching. The benchmark files can be found in the upstream zstd source repository under `contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}` [1] [2]. I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, 16 GB of RAM, and a SSD. The first compression benchmark is copying 10 copies of the unzipped Silesia corpus [3] into a BtrFS filesystem mounted with `-o compress-force=Method`. The decompression benchmark times how long it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is measured by comparing the output of `df` and `du`. See the benchmark file [1] for details. I benchmarked multiple zstd compression levels, although the patch uses zstd level 1. | Method | Ratio | Compression MB/s | Decompression speed | |---------|-------|------------------|---------------------| | None | 0.99 | 504 | 686 | | lzo | 1.66 | 398 | 442 | | zlib | 2.58 | 65 | 241 | | zstd 1 | 2.57 | 260 | 383 | | zstd 3 | 2.71 | 174 | 408 | | zstd 6 | 2.87 | 70 | 398 | | zstd 9 | 2.92 | 43 | 406 | | zstd 12 | 2.93 | 21 | 408 | | zstd 15 | 3.01 | 11 | 354 | The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it measures the compression ratio, extracts the tar, and deletes the tar. Then it measures the compression ratio again, and `tar`s the extracted files into `/dev/null`. See the benchmark file [2] for details. | Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) | |--------|-----------|---------------|----------|------------|----------| | None | 0.97 | 0.78 | 0.981 | 5.501 | 8.807 | | lzo | 2.06 | 1.38 | 1.631 | 8.458 | 8.585 | | zlib | 3.40 | 1.86 | 7.750 | 21.544 | 11.744 | | zstd 1 | 3.57 | 1.85 | 2.579 | 11.479 | 9.389 | [1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh [3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia [4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz zstd source repository: https://github.com/facebook/zstd Signed-off-by: Nick Terrell Signed-off-by: Chris Mason --- include/uapi/linux/btrfs.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 9aa74f317747..378230c163d5 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -255,13 +255,7 @@ struct btrfs_ioctl_fs_info_args { #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) -/* - * some patches floated around with a second compression method - * lets save that incompat here for when they do get in - * Note we don't actually support it, we're just reserving the - * number - */ -#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) /* * older kernels tried to do bigger metadata blocks, but the -- cgit From e01d1913b0d0817191418381a6fcebaa01abde2a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:40 +0800 Subject: iommu: Add is_attach_deferred call-back to iommu-ops This new call-back will be used to check if the domain attach need be deferred for now. If yes, the domain attach/detach will return directly. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..63983c9e6c3a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -225,6 +225,7 @@ struct iommu_ops { u32 (*domain_get_windows)(struct iommu_domain *domain); int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev); unsigned long pgsize_bitmap; }; -- cgit From 42f87e71c3df12d8f29ec1bb7b47772ffaeaf1ee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 14:44:28 +0200 Subject: iommu/iova: Add flush-queue data structures This patch adds the basic data-structures to implement flush-queues in the generic IOVA code. It also adds the initialization and destroy routines for these data structures. The initialization routine is designed so that the use of this feature is optional for the users of IOVA code. Signed-off-by: Joerg Roedel --- include/linux/iova.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index e0a892ae45c0..8aa10896150e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -36,6 +36,30 @@ struct iova_rcache { struct iova_cpu_rcache __percpu *cpu_rcaches; }; +struct iova_domain; + +/* Call-Back from IOVA code into IOMMU drivers */ +typedef void (* iova_flush_cb)(struct iova_domain *domain); + +/* Destructor for per-entry data */ +typedef void (* iova_entry_dtor)(unsigned long data); + +/* Number of entries per Flush Queue */ +#define IOVA_FQ_SIZE 256 + +/* Flush Queue entry for defered flushing */ +struct iova_fq_entry { + unsigned long iova_pfn; + unsigned long pages; + unsigned long data; +}; + +/* Per-CPU Flush Queue structure */ +struct iova_fq { + struct iova_fq_entry entries[IOVA_FQ_SIZE]; + unsigned head, tail; +}; + /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ @@ -45,6 +69,14 @@ struct iova_domain { unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ + + iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU + TLBs */ + + iova_entry_dtor entry_dtor; /* IOMMU driver specific destructor for + iova entry */ + + struct iova_fq __percpu *fq; /* Flush Queue */ }; static inline unsigned long iova_size(struct iova *iova) @@ -102,6 +134,8 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn, unsigned long pfn_32bit); +int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, @@ -174,6 +208,13 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +static inline int init_iova_flush_queue(struct iova_domain *iovad, + iova_flush_cb flush_cb, + iova_entry_dtor entry_dtor) +{ + return -ENODEV; +} + static inline struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { -- cgit From 1928210107edd4fa786199fef6b875d3af3bef88 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 15:49:44 +0200 Subject: iommu/iova: Implement Flush-Queue ring buffer Add a function to add entries to the Flush-Queue ring buffer. If the buffer is full, call the flush-callback and free the entries. Signed-off-by: Joerg Roedel --- include/linux/iova.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 8aa10896150e..1ae85248ec50 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -127,6 +127,9 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, bool size_aligned); void free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size); +void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, @@ -182,6 +185,12 @@ static inline void free_iova_fast(struct iova_domain *iovad, { } +static inline void queue_iova(struct iova_domain *iovad, + unsigned long pfn, unsigned long pages, + unsigned long data) +{ +} + static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn) -- cgit From fb418dab8a4f01dde0c025d15145c589ec02796b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:14:59 +0200 Subject: iommu/iova: Add flush counters to Flush-Queue implementation There are two counters: * fq_flush_start_cnt - Increased when a TLB flush is started. * fq_flush_finish_cnt - Increased when a TLB flush is finished. The fq_flush_start_cnt is assigned to every Flush-Queue entry on its creation. When freeing entries from the Flush-Queue, the value in the entry is compared to the fq_flush_finish_cnt. The entry can only be freed when its value is less than the value of fq_flush_finish_cnt. The reason for these counters it to take advantage of IOMMU TLB flushes that happened on other CPUs. These already flushed the TLB for Flush-Queue entries on other CPUs so that they can already be freed without flushing the TLB again. This makes it less likely that the Flush-Queue is full and saves IOMMU TLB flushes. Signed-off-by: Joerg Roedel --- include/linux/iova.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 1ae85248ec50..985b8008999e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* iova structure */ @@ -52,6 +53,7 @@ struct iova_fq_entry { unsigned long iova_pfn; unsigned long pages; unsigned long data; + u64 counter; /* Flush counter when this entrie was added */ }; /* Per-CPU Flush Queue structure */ @@ -77,6 +79,12 @@ struct iova_domain { iova entry */ struct iova_fq __percpu *fq; /* Flush Queue */ + + atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that + have been started */ + + atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that + have been finished */ }; static inline unsigned long iova_size(struct iova *iova) -- cgit From 8109c2a2f8463852dddd6a1c3fcf262047c0c124 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:31:17 +0200 Subject: iommu/iova: Add locking to Flush-Queues The lock is taken from the same CPU most of the time. But having it allows to flush the queue also from another CPU if necessary. This will be used by a timer to regularily flush any pending IOVAs from the Flush-Queues. Signed-off-by: Joerg Roedel --- include/linux/iova.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 985b8008999e..913a690cd4b0 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -60,6 +60,7 @@ struct iova_fq_entry { struct iova_fq { struct iova_fq_entry entries[IOVA_FQ_SIZE]; unsigned head, tail; + spinlock_t lock; }; /* holds all the iova translations for a domain */ -- cgit From 9a005a800ae817c2c90ef117d7cd77614d866777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 16:58:18 +0200 Subject: iommu/iova: Add flush timer Add a timer to flush entries from the Flush-Queues every 10ms. This makes sure that no stale TLB entries remain for too long after an IOVA has been unmapped. Signed-off-by: Joerg Roedel --- include/linux/iova.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/iova.h b/include/linux/iova.h index 913a690cd4b0..d179b9bf7814 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -48,6 +48,9 @@ typedef void (* iova_entry_dtor)(unsigned long data); /* Number of entries per Flush Queue */ #define IOVA_FQ_SIZE 256 +/* Timeout (in ms) after which entries are flushed from the Flush-Queue */ +#define IOVA_FQ_TIMEOUT 10 + /* Flush Queue entry for defered flushing */ struct iova_fq_entry { unsigned long iova_pfn; @@ -86,6 +89,11 @@ struct iova_domain { atomic64_t fq_flush_finish_cnt; /* Number of TLB flushes that have been finished */ + + struct timer_list fq_timer; /* Timer to regularily empty the + flush-queues */ + atomic_t fq_timer_on; /* 1 when timer is active, 0 + when not */ }; static inline unsigned long iova_size(struct iova *iova) -- cgit From 48ac3c18cc62d4a23d5dc5c59f8720589d0de14b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 14 Jul 2017 12:23:09 +0100 Subject: fork: allow arch-override of VMAP stack alignment In some cases, an architecture might wish its stacks to be aligned to a boundary larger than THREAD_SIZE. For example, using an alignment of double THREAD_SIZE can allow for stack overflows smaller than THREAD_SIZE to be detected by checking a single bit of the stack pointer. This patch allows architectures to override the alignment of VMAP'd stacks, by defining THREAD_ALIGN. Where not defined, this defaults to THREAD_SIZE, as is the case today. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Tested-by: Laura Abbott Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: linux-kernel@vger.kernel.org --- include/linux/thread_info.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 250a27614328..905d769d8ddc 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -38,6 +38,10 @@ enum { #ifdef __KERNEL__ +#ifndef THREAD_ALIGN +#define THREAD_ALIGN THREAD_SIZE +#endif + #ifdef CONFIG_DEBUG_STACK_USAGE # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ __GFP_ZERO) -- cgit From b8c502b81e3f899c6488967dec61eed0f5907db3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Aug 2017 23:12:46 +0800 Subject: f2fs: fix potential overflow when adjusting GC cycle While comparing signed and unsigned variables, compiler will converts the signed value to unsigned one, due to this reason, {in,de}crease_sleep_time may return overflowed result. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 6f77a2755abb..db5c98256147 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -543,14 +543,14 @@ TRACE_EVENT(f2fs_map_blocks, TRACE_EVENT(f2fs_background_gc, - TP_PROTO(struct super_block *sb, long wait_ms, + TP_PROTO(struct super_block *sb, unsigned int wait_ms, unsigned int prefree, unsigned int free), TP_ARGS(sb, wait_ms, prefree, free), TP_STRUCT__entry( __field(dev_t, dev) - __field(long, wait_ms) + __field(unsigned int, wait_ms) __field(unsigned int, prefree) __field(unsigned int, free) ), @@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_background_gc, __entry->free = free; ), - TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u", + TP_printk("dev = (%d,%d), wait_ms = %u, prefree = %u, free = %u", show_dev(__entry->dev), __entry->wait_ms, __entry->prefree, -- cgit From b352baf15b66c5799018104d38f9eb77c7445a34 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 15 Aug 2017 12:02:16 -0700 Subject: PCI: Move enum pci_interrupt_pin to linux/pci.h We currently have a definition of enum pci_interrupt_pin in a header specific to PCI endpoints - linux/pci-epf.h. In order to allow for use of this enum from PCI host code in a future commit, move its definition to linux/pci.h & include that from linux/pci-epf.h. Additionally we add a PCI_NUM_INTX macro which indicates the number of PCI INTx interrupts, and will be used alongside enum pci_interrupt_pin in further patches. Signed-off-by: Paul Burton [bhelgaas: move enum pci_interrupt_pin outside #ifdef CONFIG_PCI] Signed-off-by: Bjorn Helgaas --- include/linux/pci-epf.h | 9 +-------- include/linux/pci.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 0d529cb90143..bc8750688348 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -14,17 +14,10 @@ #include #include +#include struct pci_epf; -enum pci_interrupt_pin { - PCI_INTERRUPT_UNKNOWN, - PCI_INTERRUPT_INTA, - PCI_INTERRUPT_INTB, - PCI_INTERRUPT_INTC, - PCI_INTERRUPT_INTD, -}; - enum pci_barno { BAR_0, BAR_1, diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..bb9c367c85f0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -102,6 +102,28 @@ enum { DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES, }; +/** + * enum pci_interrupt_pin - PCI INTx interrupt values + * @PCI_INTERRUPT_UNKNOWN: Unknown or unassigned interrupt + * @PCI_INTERRUPT_INTA: PCI INTA pin + * @PCI_INTERRUPT_INTB: PCI INTB pin + * @PCI_INTERRUPT_INTC: PCI INTC pin + * @PCI_INTERRUPT_INTD: PCI INTD pin + * + * Corresponds to values for legacy PCI INTx interrupts, as can be found in the + * PCI_INTERRUPT_PIN register. + */ +enum pci_interrupt_pin { + PCI_INTERRUPT_UNKNOWN, + PCI_INTERRUPT_INTA, + PCI_INTERRUPT_INTB, + PCI_INTERRUPT_INTC, + PCI_INTERRUPT_INTD, +}; + +/* The number of legacy PCI INTx interrupts */ +#define PCI_NUM_INTX 4 + /* * pci_power_t values must match the bits in the Capabilities PME_Support * and Control/Status PowerState fields in the Power Management capability. -- cgit From dc503a8ad98474ea0073a1c5c4d9f18cb8dd0dbf Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 15 Aug 2017 20:34:35 +0100 Subject: bpf/verifier: track liveness for pruning State of a register doesn't matter if it wasn't read in reaching an exit; a write screens off all reads downstream of it from all explored_states upstream of it. This allows us to prune many more branches; here are some processed insn counts for some Cilium programs: Program before after bpf_lb_opt_-DLB_L3.o 6515 3361 bpf_lb_opt_-DLB_L4.o 8976 5176 bpf_lb_opt_-DUNKNOWN.o 2960 1137 bpf_lxc_opt_-DDROP_ALL.o 95412 48537 bpf_lxc_opt_-DUNKNOWN.o 141706 78718 bpf_netdev.o 24251 17995 bpf_overlay.o 10999 9385 The runtime is also improved; here are 'time' results in ms: Program before after bpf_lb_opt_-DLB_L3.o 24 6 bpf_lb_opt_-DLB_L4.o 26 11 bpf_lb_opt_-DUNKNOWN.o 11 2 bpf_lxc_opt_-DDROP_ALL.o 1288 139 bpf_lxc_opt_-DUNKNOWN.o 1768 234 bpf_netdev.o 62 31 bpf_overlay.o 15 13 Signed-off-by: Edward Cree Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c61c3033522e..91d07efed2ba 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -21,6 +21,12 @@ */ #define BPF_MAX_VAR_SIZ INT_MAX +enum bpf_reg_liveness { + REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ + REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ + REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */ +}; + struct bpf_reg_state { enum bpf_reg_type type; union { @@ -40,7 +46,7 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* These five fields must be last. See states_equal() */ + /* Ordering of fields matters. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -57,6 +63,8 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* This field must be last, for states_equal() reasons. */ + enum bpf_reg_liveness live; }; enum bpf_stack_slot_type { @@ -74,6 +82,7 @@ struct bpf_verifier_state { struct bpf_reg_state regs[MAX_BPF_REG]; u8 stack_slot_type[MAX_BPF_STACK]; struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; + struct bpf_verifier_state *parent; }; /* linked list of verifier states used to prune search */ -- cgit From fe4007999599c02598c17b643e8de43e487d48e8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Aug 2017 09:09:49 +0200 Subject: ipv6: fib: Provide offload indication using nexthop flags IPv6 routes currently lack nexthop flags as in IPv4. This has several implications. In the forwarding path, it requires us to check the carrier state of the nexthop device and potentially ignore a linkdown route, instead of checking for RTNH_F_LINKDOWN. It also requires capable drivers to use the user facing IPv6-specific route flags to provide offload indication, instead of using the nexthop flags as in IPv4. Add nexthop flags to IPv6 routes in the 40 bytes hole and use it to provide offload indication instead of the RTF_OFFLOAD flag, which is removed while it's still not part of any official kernel release. In the near future we would like to use the field for the RTNH_F_{LINKDOWN,DEAD} flags, but this change is more involved and might not be ready in time for the current cycle. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ include/uapi/linux/ipv6_route.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1d790ea40ea7..71c1646298ae 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -120,6 +120,8 @@ struct rt6_info { atomic_t rt6i_ref; + unsigned int rt6i_nh_flags; + /* These are in a separate cache line. */ struct rt6key rt6i_dst ____cacheline_aligned_in_smp; u32 rt6i_flags; diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 33e2a5732bd1..d496c02e14bc 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -35,7 +35,6 @@ #define RTF_PREF(pref) ((pref) << 27) #define RTF_PREF_MASK 0x18000000 -#define RTF_OFFLOAD 0x20000000 /* offloaded route */ #define RTF_PCPU 0x40000000 /* read-only: can not be set by user */ #define RTF_LOCAL 0x80000000 -- cgit From 12d94a804946af291e24b80fc53ec86264765781 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 04:09:51 -0700 Subject: ipv6: fix NULL dereference in ip6_route_dev_notify() Based on a syzkaller report [1], I found that a per cpu allocation failure in snmp6_alloc_dev() would then lead to NULL dereference in ip6_route_dev_notify(). It seems this is a very old bug, thus no Fixes tag in this submission. Let's add in6_dev_put_clear() helper, as we will probably use it elsewhere (once available/present in net-next) [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 17294 Comm: syz-executor6 Not tainted 4.13.0-rc2+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff88019f456680 task.stack: ffff8801c6e58000 RIP: 0010:__read_once_size include/linux/compiler.h:250 [inline] RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline] RIP: 0010:refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: 0018:ffff8801c6e5f1b0 EFLAGS: 00010202 RAX: 0000000000000037 RBX: dffffc0000000000 RCX: ffffc90005d25000 RDX: ffff8801c6e5f218 RSI: ffffffff82342bbf RDI: 0000000000000001 RBP: ffff8801c6e5f240 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10038dcbe37 R13: 0000000000000006 R14: 0000000000000001 R15: 00000000000001b8 FS: 00007f21e0429700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001ddbc22000 CR3: 00000001d632b000 CR4: 00000000001426e0 DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: refcount_dec_and_test+0x1a/0x20 lib/refcount.c:211 in6_dev_put include/net/addrconf.h:335 [inline] ip6_route_dev_notify+0x1c9/0x4a0 net/ipv6/route.c:3732 notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1678 call_netdevice_notifiers net/core/dev.c:1694 [inline] rollback_registered_many+0x91c/0xe80 net/core/dev.c:7107 rollback_registered+0x1be/0x3c0 net/core/dev.c:7149 register_netdevice+0xbcd/0xee0 net/core/dev.c:7587 register_netdev+0x1a/0x30 net/core/dev.c:7669 loopback_net_init+0x76/0x160 drivers/net/loopback.c:214 ops_init+0x10a/0x570 net/core/net_namespace.c:118 setup_net+0x313/0x710 net/core/net_namespace.c:294 copy_net_ns+0x27c/0x580 net/core/net_namespace.c:418 create_new_namespaces+0x425/0x880 kernel/nsproxy.c:107 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:206 SYSC_unshare kernel/fork.c:2347 [inline] SyS_unshare+0x653/0xfa0 kernel/fork.c:2297 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512c9 RSP: 002b:00007f21e0428c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 0000000000718150 RCX: 00000000004512c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000062020200 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b973d R13: 00000000ffffffff R14: 000000002001d000 R15: 00000000000002dd Code: 50 2b 34 82 c7 00 f1 f1 f1 f1 c7 40 04 04 f2 f2 f2 c7 40 08 f3 f3 f3 f3 e8 a1 43 39 ff 4c 89 f8 48 8b 95 70 ff ff ff 48 c1 e8 03 <0f> b6 0c 18 4c 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RIP: __read_once_size include/linux/compiler.h:250 [inline] RSP: ffff8801c6e5f1b0 RIP: atomic_read arch/x86/include/asm/atomic.h:26 [inline] RSP: ffff8801c6e5f1b0 RIP: refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: ffff8801c6e5f1b0 ---[ end trace e441d046c6410d31 ]--- Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/net/addrconf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6df79e96a780..f44ff2476758 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -336,6 +336,16 @@ static inline void in6_dev_put(struct inet6_dev *idev) in6_dev_finish_destroy(idev); } +static inline void in6_dev_put_clear(struct inet6_dev **pidev) +{ + struct inet6_dev *idev = *pidev; + + if (idev) { + in6_dev_put(idev); + *pidev = NULL; + } +} + static inline void __in6_dev_put(struct inet6_dev *idev) { refcount_dec(&idev->refcnt); -- cgit From b3dc8f772fab5b2d284b780830fd56494491e493 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 15 Aug 2017 04:28:54 -0700 Subject: net: Fix a typo in comment about sock flags. Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index dda2cc939a53..ebeb48c92005 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -37,7 +37,7 @@ struct net; /* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. - * Eventually all flags will be in sk->sk_wq_flags. + * Eventually all flags will be in sk->sk_wq->flags. */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 -- cgit From 808ae8f3c7fefef3aece08820c108b68cdb06e1e Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 22 Mar 2017 19:17:49 +0900 Subject: extcon: Remove deprecated extcon_set/get_cable_state_() The commit 575c2b867ee0 ("extcon: Rename the extcon_set/get_state() to maintain the function naming pattern") renames the extcon function as following: But, the extcon just keeps the old API to prevent the build error. This patch removes the deprecatd extcon API. - extcon_get_cable_state_() -> extcon_get_state() - extcon_set_cable_state_() -> extcon_set_state_sync() Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 7e206a9f88db..3ba02eecba2e 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -422,15 +422,4 @@ static inline int extcon_unregister_interest(struct extcon_specific_cable_nb { return -EINVAL; } - -static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id) -{ - return extcon_get_state(edev, id); -} - -static inline int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id, - bool cable_state) -{ - return extcon_set_state_sync(edev, id, cable_state); -} #endif /* __LINUX_EXTCON_H__ */ -- cgit From 6ab6094f370df082fdf2e117381dd323ce948f68 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 3 Apr 2017 19:45:48 +0900 Subject: extcon: Correct description to improve the readability The extcon files explains the detailed operation for functions and what is meaning of extcon structure. There are different explanation even if the same argument. So, it modifies the description for both functions and structures in order to improve the readability and guide the role of functions more well. Also, this patch fixes the mismatching license info as a GPL v2 and removes the inactive author information. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 72 ++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 3ba02eecba2e..e03204d5a366 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -1,5 +1,5 @@ /* - * External connector (extcon) class driver + * External Connector (extcon) framework * * Copyright (C) 2015 Samsung Electronics * Author: Chanwoo Choi @@ -20,8 +20,7 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * -*/ + */ #ifndef __LINUX_EXTCON_H__ #define __LINUX_EXTCON_H__ @@ -93,7 +92,7 @@ #define EXTCON_NUM 63 /* - * Define the property of supported external connectors. + * Define the properties of supported external connectors. * * When adding the new extcon property, they *must* have * the type/value/default information. Also, you *have to* @@ -176,44 +175,42 @@ struct extcon_dev; #if IS_ENABLED(CONFIG_EXTCON) -/* - * Following APIs are for notifiers or configurations. - * Notifiers are the external port and connection devices. - */ +/* Following APIs register/unregister the extcon device. */ extern int extcon_dev_register(struct extcon_dev *edev); extern void extcon_dev_unregister(struct extcon_dev *edev); extern int devm_extcon_dev_register(struct device *dev, struct extcon_dev *edev); extern void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev); -extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name); -/* - * Following APIs control the memory of extcon device. - */ +/* Following APIs allocate/free the memory of the extcon device. */ extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); extern void extcon_dev_free(struct extcon_dev *edev); extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, const unsigned int *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); +/* Synchronize the state and property value for each external connector. */ +extern int extcon_sync(struct extcon_dev *edev, unsigned int id); + /* - * get/set_state access each bit of the 32b encoded state value. - * They are used to access the status of each cable based on the cable id. + * Following APIs get/set the connected state of each external connector. + * The 'id' argument indicates the defined external connector. */ extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, - bool cable_state); + bool state); extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, - bool cable_state); -/* - * Synchronize the state and property data for a specific external connector. - */ -extern int extcon_sync(struct extcon_dev *edev, unsigned int id); + bool state); /* - * get/set_property access the property value of each external connector. - * They are used to access the property of each cable based on the property id. + * Following APIs get/set the property of each external connector. + * The 'id' argument indicates the defined external connector + * and the 'prop' indicates the extcon property. + * + * And extcon_get/set_property_capability() set the capability of the property + * for each external connector. They are used to set the capability of the + * property of each external connector based on the id and property. */ extern int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, @@ -224,19 +221,15 @@ extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value prop_val); - -/* - * get/set_property_capability set the capability of the property for each - * external connector. They are used to set the capability of the property - * of each external connector based on the id and property. - */ extern int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); extern int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); /* - * Following APIs are to monitor the status change of the external connectors. + * Following APIs register the notifier block in order to detect + * the change of both state and property value for each external connector. + * * extcon_register_notifier(*edev, id, *nb) : Register a notifier block * for specific external connector of the extcon. * extcon_register_notifier_all(*edev, *nb) : Register a notifier block @@ -265,16 +258,15 @@ extern void devm_extcon_unregister_notifier_all(struct device *dev, struct notifier_block *nb); /* - * Following API get the extcon device from devicetree. - * This function use phandle of devicetree to get extcon device directly. + * Following APIs get the extcon_dev from devicetree or by through extcon name. */ +extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name); extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index); -/* Following API to get information of extcon device */ +/* Following API get the name of extcon device. */ extern const char *extcon_get_edev_name(struct extcon_dev *edev); - #else /* CONFIG_EXTCON */ static inline int extcon_dev_register(struct extcon_dev *edev) { @@ -314,13 +306,13 @@ static inline int extcon_get_state(struct extcon_dev *edev, unsigned int id) } static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, - bool cable_state) + bool state) { return 0; } static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, - bool cable_state) + bool state) { return 0; } @@ -362,11 +354,6 @@ static inline int extcon_set_property_capability(struct extcon_dev *edev, return 0; } -static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) -{ - return NULL; -} - static inline int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb) @@ -392,6 +379,11 @@ static inline void devm_extcon_unregister_notifier(struct device *dev, struct extcon_dev *edev, unsigned int id, struct notifier_block *nb) { } +static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) +{ + return NULL; +} + static inline struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, int index) { -- cgit From ab8a8fbe939de5e2651da6761f83a2b9641b2daa Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 14 Jul 2017 01:00:55 +0900 Subject: extcon: Use tab instead of space for indentation The extcon header file defines the functions which used the mismatched indentation and used the space on some case. So, this patch clean-up the indentation in order to improve the readbility. And this patch changes the return value of extcon_get_extcon_dev() because of maintaing the same value with extcon_get_edev_by_phandle(). Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 51 ++++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index e03204d5a366..744d60ca80c3 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -179,15 +179,15 @@ struct extcon_dev; extern int extcon_dev_register(struct extcon_dev *edev); extern void extcon_dev_unregister(struct extcon_dev *edev); extern int devm_extcon_dev_register(struct device *dev, - struct extcon_dev *edev); + struct extcon_dev *edev); extern void devm_extcon_dev_unregister(struct device *dev, - struct extcon_dev *edev); + struct extcon_dev *edev); /* Following APIs allocate/free the memory of the extcon device. */ extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); extern void extcon_dev_free(struct extcon_dev *edev); extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const unsigned int *cable); + const unsigned int *cable); extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); /* Synchronize the state and property value for each external connector. */ @@ -199,7 +199,7 @@ extern int extcon_sync(struct extcon_dev *edev, unsigned int id); */ extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, - bool state); + bool state); extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, bool state); @@ -236,9 +236,9 @@ extern int extcon_set_property_capability(struct extcon_dev *edev, * for all supported external connectors of the extcon. */ extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, - struct notifier_block *nb); + struct notifier_block *nb); extern int extcon_unregister_notifier(struct extcon_dev *edev, unsigned int id, - struct notifier_block *nb); + struct notifier_block *nb); extern int devm_extcon_register_notifier(struct device *dev, struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); @@ -276,13 +276,13 @@ static inline int extcon_dev_register(struct extcon_dev *edev) static inline void extcon_dev_unregister(struct extcon_dev *edev) { } static inline int devm_extcon_dev_register(struct device *dev, - struct extcon_dev *edev) + struct extcon_dev *edev) { return -EINVAL; } static inline void devm_extcon_dev_unregister(struct device *dev, - struct extcon_dev *edev) { } + struct extcon_dev *edev) { } static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable) { @@ -292,7 +292,7 @@ static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable) static inline void extcon_dev_free(struct extcon_dev *edev) { } static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const unsigned int *cable) + const unsigned int *cable) { return ERR_PTR(-ENOSYS); } @@ -323,47 +323,45 @@ static inline int extcon_sync(struct extcon_dev *edev, unsigned int id) } static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, - unsigned int prop, - union extcon_property_value *prop_val) + unsigned int prop, + union extcon_property_value *prop_val) { return 0; } static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, - unsigned int prop, - union extcon_property_value prop_val) + unsigned int prop, + union extcon_property_value prop_val) { return 0; } static inline int extcon_set_property_sync(struct extcon_dev *edev, - unsigned int id, unsigned int prop, - union extcon_property_value prop_val) + unsigned int id, unsigned int prop, + union extcon_property_value prop_val) { return 0; } static inline int extcon_get_property_capability(struct extcon_dev *edev, - unsigned int id, unsigned int prop) + unsigned int id, unsigned int prop) { return 0; } static inline int extcon_set_property_capability(struct extcon_dev *edev, - unsigned int id, unsigned int prop) + unsigned int id, unsigned int prop) { return 0; } static inline int extcon_register_notifier(struct extcon_dev *edev, - unsigned int id, - struct notifier_block *nb) + unsigned int id, struct notifier_block *nb) { return 0; } static inline int extcon_unregister_notifier(struct extcon_dev *edev, - unsigned int id, - struct notifier_block *nb) + unsigned int id, struct notifier_block *nb) { return 0; } @@ -381,11 +379,11 @@ static inline void devm_extcon_unregister_notifier(struct device *dev, static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, - int index) + int index) { return ERR_PTR(-ENODEV); } @@ -403,14 +401,13 @@ struct extcon_specific_cable_nb { }; static inline int extcon_register_interest(struct extcon_specific_cable_nb *obj, - const char *extcon_name, const char *cable_name, - struct notifier_block *nb) + const char *extcon_name, const char *cable_name, + struct notifier_block *nb) { return -EINVAL; } -static inline int extcon_unregister_interest(struct extcon_specific_cable_nb - *obj) +static inline int extcon_unregister_interest(struct extcon_specific_cable_nb *obj) { return -EINVAL; } -- cgit From 6a1c9510694fe1e901a3b5b53386eac069adcea6 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Tue, 15 Aug 2017 23:00:20 -0400 Subject: drm/amdkfd: Adding new IOCTL for scratch memory v2 v2: * Renamed ALLOC_MEMORY_OF_SCRATCH to SET_SCRATCH_BACKING_VA * Removed size parameter from the ioctl, it was unused * Removed hole in ioctl number space * No more call to write_config_static_mem * Return correct error code from ioctl Signed-off-by: Moses Reuben Signed-off-by: Ben Goz Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d6833426fdef..1b9c5609d523 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -232,6 +232,12 @@ struct kfd_ioctl_wait_events_args { uint32_t wait_result; /* from KFD */ }; +struct kfd_ioctl_set_scratch_backing_va_args { + uint64_t va_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -286,7 +292,10 @@ struct kfd_ioctl_wait_events_args { #define AMDKFD_IOC_DBG_WAVE_CONTROL \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) +#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ + AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x11 +#define AMDKFD_COMMAND_END 0x12 #endif -- cgit From 5d71dbc3a588690c3d66d76db8cd29973425ce6d Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 15 Aug 2017 23:00:22 -0400 Subject: drm/amdkfd: Implement image tiling mode support v2 v2: Removed hole in ioctl number space Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 1b9c5609d523..7b4567bacfc2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -238,6 +238,29 @@ struct kfd_ioctl_set_scratch_backing_va_args { uint32_t pad; }; +struct kfd_ioctl_get_tile_config_args { + /* to KFD: pointer to tile array */ + uint64_t tile_config_ptr; + /* to KFD: pointer to macro tile array */ + uint64_t macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + uint32_t num_macro_tile_configs; + + uint32_t gpu_id; /* to KFD */ + uint32_t gb_addr_config; /* from KFD */ + uint32_t num_banks; /* from KFD */ + uint32_t num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -295,7 +318,10 @@ struct kfd_ioctl_set_scratch_backing_va_args { #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x12 +#define AMDKFD_COMMAND_END 0x13 #endif -- cgit From 714c53aa2e2d6d60ca1e7d18980767c8b715c288 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Jul 2017 16:58:42 +0200 Subject: clk: renesas: Add r8a77995 CPG Core Clock Definitions Add all R-Car D3 Clock Pulse Generator Core Clock Outputs, as listed in Table 8.2f ("List of Clocks [R-Car D3]") of the R-Car Series, 3rd Generation Hardware User's Manual (Rev. 0.55, Jun. 30, 2017). Note that internal CPG clocks (S0, S1, S2, S3, S1C, S3C, SDSRC, and SSPSRC) are not included, as they are used as internal clock sources only, and never referenced from DT. Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd --- include/dt-bindings/clock/r8a77995-cpg-mssr.h | 57 +++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/dt-bindings/clock/r8a77995-cpg-mssr.h (limited to 'include') diff --git a/include/dt-bindings/clock/r8a77995-cpg-mssr.h b/include/dt-bindings/clock/r8a77995-cpg-mssr.h new file mode 100644 index 000000000000..4e8ae3dee590 --- /dev/null +++ b/include/dt-bindings/clock/r8a77995-cpg-mssr.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2017 Glider bvba + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef __DT_BINDINGS_CLOCK_R8A77995_CPG_MSSR_H__ +#define __DT_BINDINGS_CLOCK_R8A77995_CPG_MSSR_H__ + +#include + +/* r8a77995 CPG Core Clocks */ +#define R8A77995_CLK_Z2 0 +#define R8A77995_CLK_ZG 1 +#define R8A77995_CLK_ZTR 2 +#define R8A77995_CLK_ZT 3 +#define R8A77995_CLK_ZX 4 +#define R8A77995_CLK_S0D1 5 +#define R8A77995_CLK_S1D1 6 +#define R8A77995_CLK_S1D2 7 +#define R8A77995_CLK_S1D4 8 +#define R8A77995_CLK_S2D1 9 +#define R8A77995_CLK_S2D2 10 +#define R8A77995_CLK_S2D4 11 +#define R8A77995_CLK_S3D1 12 +#define R8A77995_CLK_S3D2 13 +#define R8A77995_CLK_S3D4 14 +#define R8A77995_CLK_S1D4C 15 +#define R8A77995_CLK_S3D1C 16 +#define R8A77995_CLK_S3D2C 17 +#define R8A77995_CLK_S3D4C 18 +#define R8A77995_CLK_LB 19 +#define R8A77995_CLK_CL 20 +#define R8A77995_CLK_ZB3 21 +#define R8A77995_CLK_ZB3D2 22 +#define R8A77995_CLK_CR 23 +#define R8A77995_CLK_CRD2 24 +#define R8A77995_CLK_SD0H 25 +#define R8A77995_CLK_SD0 26 +#define R8A77995_CLK_SSP2 27 +#define R8A77995_CLK_SSP1 28 +#define R8A77995_CLK_RPC 29 +#define R8A77995_CLK_RPCD2 30 +#define R8A77995_CLK_ZA2 31 +#define R8A77995_CLK_ZA8 32 +#define R8A77995_CLK_Z2D 33 +#define R8A77995_CLK_CANFD 34 +#define R8A77995_CLK_MSO 35 +#define R8A77995_CLK_R 36 +#define R8A77995_CLK_OSC 37 +#define R8A77995_CLK_LV0 38 +#define R8A77995_CLK_LV1 39 +#define R8A77995_CLK_CP 40 + +#endif /* __DT_BINDINGS_CLOCK_R8A77995_CPG_MSSR_H__ */ -- cgit From 9a35b63728ceb8602c111260044451dd64952500 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 28 Jun 2017 21:56:54 -0600 Subject: btrfs: constify tracepoint arguments Tracepoint arguments are all read-only. If we mark the arguments as const, we're able to keep or convert those arguments to const where appropriate. Signed-off-by: Jeff Mahoney Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 242 +++++++++++++++++++++++-------------------- 1 file changed, 128 insertions(+), 114 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index cd99a3658156..42560feb9920 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -92,7 +92,7 @@ struct btrfs_qgroup; TRACE_EVENT(btrfs_transaction_commit, - TP_PROTO(struct btrfs_root *root), + TP_PROTO(const struct btrfs_root *root), TP_ARGS(root), @@ -113,7 +113,7 @@ TRACE_EVENT(btrfs_transaction_commit, DECLARE_EVENT_CLASS(btrfs__inode, - TP_PROTO(struct inode *inode), + TP_PROTO(const struct inode *inode), TP_ARGS(inode), @@ -151,21 +151,21 @@ DECLARE_EVENT_CLASS(btrfs__inode, DEFINE_EVENT(btrfs__inode, btrfs_inode_new, - TP_PROTO(struct inode *inode), + TP_PROTO(const struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(btrfs__inode, btrfs_inode_request, - TP_PROTO(struct inode *inode), + TP_PROTO(const struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, - TP_PROTO(struct inode *inode), + TP_PROTO(const struct inode *inode), TP_ARGS(inode) ); @@ -192,8 +192,8 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, TRACE_EVENT_CONDITION(btrfs_get_extent, - TP_PROTO(struct btrfs_root *root, struct btrfs_inode *inode, - struct extent_map *map), + TP_PROTO(const struct btrfs_root *root, const struct btrfs_inode *inode, + const struct extent_map *map), TP_ARGS(root, inode, map), @@ -388,7 +388,8 @@ DEFINE_EVENT( DECLARE_EVENT_CLASS(btrfs__ordered_extent, - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + TP_PROTO(const struct inode *inode, + const struct btrfs_ordered_extent *ordered), TP_ARGS(inode, ordered), @@ -440,36 +441,40 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent, DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_add, - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + TP_PROTO(const struct inode *inode, + const struct btrfs_ordered_extent *ordered), TP_ARGS(inode, ordered) ); DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_remove, - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + TP_PROTO(const struct inode *inode, + const struct btrfs_ordered_extent *ordered), TP_ARGS(inode, ordered) ); DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_start, - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + TP_PROTO(const struct inode *inode, + const struct btrfs_ordered_extent *ordered), TP_ARGS(inode, ordered) ); DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_put, - TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + TP_PROTO(const struct inode *inode, + const struct btrfs_ordered_extent *ordered), TP_ARGS(inode, ordered) ); DECLARE_EVENT_CLASS(btrfs__writepage, - TP_PROTO(struct page *page, struct inode *inode, - struct writeback_control *wbc), + TP_PROTO(const struct page *page, const struct inode *inode, + const struct writeback_control *wbc), TP_ARGS(page, inode, wbc), @@ -517,15 +522,15 @@ DECLARE_EVENT_CLASS(btrfs__writepage, DEFINE_EVENT(btrfs__writepage, __extent_writepage, - TP_PROTO(struct page *page, struct inode *inode, - struct writeback_control *wbc), + TP_PROTO(const struct page *page, const struct inode *inode, + const struct writeback_control *wbc), TP_ARGS(page, inode, wbc) ); TRACE_EVENT(btrfs_writepage_end_io_hook, - TP_PROTO(struct page *page, u64 start, u64 end, int uptodate), + TP_PROTO(const struct page *page, u64 start, u64 end, int uptodate), TP_ARGS(page, start, end, uptodate), @@ -558,7 +563,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook, TRACE_EVENT(btrfs_sync_file, - TP_PROTO(struct file *file, int datasync), + TP_PROTO(const struct file *file, int datasync), TP_ARGS(file, datasync), @@ -570,8 +575,8 @@ TRACE_EVENT(btrfs_sync_file, ), TP_fast_assign( - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = d_inode(dentry); + const struct dentry *dentry = file->f_path.dentry; + const struct inode *inode = d_inode(dentry); TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb)); __entry->ino = inode->i_ino; @@ -589,7 +594,7 @@ TRACE_EVENT(btrfs_sync_file, TRACE_EVENT(btrfs_sync_fs, - TP_PROTO(struct btrfs_fs_info *fs_info, int wait), + TP_PROTO(const struct btrfs_fs_info *fs_info, int wait), TP_ARGS(fs_info, wait), @@ -606,8 +611,8 @@ TRACE_EVENT(btrfs_sync_fs, TRACE_EVENT(btrfs_add_block_group, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group, int create), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_block_group_cache *block_group, int create), TP_ARGS(fs_info, block_group, create), @@ -654,9 +659,9 @@ TRACE_EVENT(btrfs_add_block_group, DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_tree_ref *full_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_tree_ref *full_ref, int action), TP_ARGS(fs_info, ref, full_ref, action), @@ -697,9 +702,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref, DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_tree_ref *full_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_tree_ref *full_ref, int action), TP_ARGS(fs_info, ref, full_ref, action) @@ -707,9 +712,9 @@ DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref, DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_tree_ref *full_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_tree_ref *full_ref, int action), TP_ARGS(fs_info, ref, full_ref, action) @@ -717,9 +722,9 @@ DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref, DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_data_ref *full_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_data_ref *full_ref, int action), TP_ARGS(fs_info, ref, full_ref, action), @@ -764,9 +769,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_data_ref, DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_data_ref *full_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_data_ref *full_ref, int action), TP_ARGS(fs_info, ref, full_ref, action) @@ -774,9 +779,9 @@ DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref, DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_data_ref *full_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_data_ref *full_ref, int action), TP_ARGS(fs_info, ref, full_ref, action) @@ -784,9 +789,9 @@ DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_ref_head *head_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_ref_head *head_ref, int action), TP_ARGS(fs_info, ref, head_ref, action), @@ -814,9 +819,9 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_ref_head *head_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_ref_head *head_ref, int action), TP_ARGS(fs_info, ref, head_ref, action) @@ -824,9 +829,9 @@ DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *ref, - struct btrfs_delayed_ref_head *head_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_delayed_ref_node *ref, + const struct btrfs_delayed_ref_head *head_ref, int action), TP_ARGS(fs_info, ref, head_ref, action) @@ -846,8 +851,8 @@ DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, DECLARE_EVENT_CLASS(btrfs__chunk, - TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map, - u64 offset, u64 size), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct map_lookup *map, u64 offset, u64 size), TP_ARGS(fs_info, map, offset, size), @@ -880,24 +885,24 @@ DECLARE_EVENT_CLASS(btrfs__chunk, DEFINE_EVENT(btrfs__chunk, btrfs_chunk_alloc, - TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map, - u64 offset, u64 size), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct map_lookup *map, u64 offset, u64 size), TP_ARGS(fs_info, map, offset, size) ); DEFINE_EVENT(btrfs__chunk, btrfs_chunk_free, - TP_PROTO(struct btrfs_fs_info *fs_info, struct map_lookup *map, - u64 offset, u64 size), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct map_lookup *map, u64 offset, u64 size), TP_ARGS(fs_info, map, offset, size) ); TRACE_EVENT(btrfs_cow_block, - TP_PROTO(struct btrfs_root *root, struct extent_buffer *buf, - struct extent_buffer *cow), + TP_PROTO(const struct btrfs_root *root, const struct extent_buffer *buf, + const struct extent_buffer *cow), TP_ARGS(root, buf, cow), @@ -931,7 +936,7 @@ TRACE_EVENT(btrfs_cow_block, TRACE_EVENT(btrfs_space_reservation, - TP_PROTO(struct btrfs_fs_info *fs_info, char *type, u64 val, + TP_PROTO(const struct btrfs_fs_info *fs_info, char *type, u64 val, u64 bytes, int reserve), TP_ARGS(fs_info, type, val, bytes, reserve), @@ -963,7 +968,7 @@ TRACE_EVENT(btrfs_space_reservation, TRACE_EVENT(btrfs_trigger_flush, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 bytes, + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes, int flush, char *reason), TP_ARGS(fs_info, flags, bytes, flush, reason), @@ -1004,7 +1009,7 @@ TRACE_EVENT(btrfs_trigger_flush, TRACE_EVENT(btrfs_flush_space, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes, + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes, u64 orig_bytes, int state, int ret), TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret), @@ -1039,7 +1044,7 @@ TRACE_EVENT(btrfs_flush_space, DECLARE_EVENT_CLASS(btrfs__reserved_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 start, u64 len), + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 len), TP_ARGS(fs_info, start, len), @@ -1061,22 +1066,22 @@ DECLARE_EVENT_CLASS(btrfs__reserved_extent, DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_alloc, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 start, u64 len), + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 len), TP_ARGS(fs_info, start, len) ); DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 start, u64 len), + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 start, u64 len), TP_ARGS(fs_info, start, len) ); TRACE_EVENT(find_free_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 num_bytes, u64 empty_size, - u64 data), + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 num_bytes, + u64 empty_size, u64 data), TP_ARGS(fs_info, num_bytes, empty_size, data), @@ -1101,8 +1106,8 @@ TRACE_EVENT(find_free_extent, DECLARE_EVENT_CLASS(btrfs__reserve_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group, u64 start, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_block_group_cache *block_group, u64 start, u64 len), TP_ARGS(fs_info, block_group, start, len), @@ -1132,8 +1137,8 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent, DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group, u64 start, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_block_group_cache *block_group, u64 start, u64 len), TP_ARGS(fs_info, block_group, start, len) @@ -1141,8 +1146,8 @@ DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent, DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *block_group, u64 start, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_block_group_cache *block_group, u64 start, u64 len), TP_ARGS(fs_info, block_group, start, len) @@ -1150,7 +1155,7 @@ DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster, TRACE_EVENT(btrfs_find_cluster, - TP_PROTO(struct btrfs_block_group_cache *block_group, u64 start, + TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start, u64 bytes, u64 empty_size, u64 min_bytes), TP_ARGS(block_group, start, bytes, empty_size, min_bytes), @@ -1183,7 +1188,7 @@ TRACE_EVENT(btrfs_find_cluster, TRACE_EVENT(btrfs_failed_cluster_setup, - TP_PROTO(struct btrfs_block_group_cache *block_group), + TP_PROTO(const struct btrfs_block_group_cache *block_group), TP_ARGS(block_group), @@ -1200,8 +1205,9 @@ TRACE_EVENT(btrfs_failed_cluster_setup, TRACE_EVENT(btrfs_setup_cluster, - TP_PROTO(struct btrfs_block_group_cache *block_group, - struct btrfs_free_cluster *cluster, u64 size, int bitmap), + TP_PROTO(const struct btrfs_block_group_cache *block_group, + const struct btrfs_free_cluster *cluster, + u64 size, int bitmap), TP_ARGS(block_group, cluster, size, bitmap), @@ -1235,12 +1241,13 @@ TRACE_EVENT(btrfs_setup_cluster, struct extent_state; TRACE_EVENT(alloc_extent_state, - TP_PROTO(struct extent_state *state, gfp_t mask, unsigned long IP), + TP_PROTO(const struct extent_state *state, + gfp_t mask, unsigned long IP), TP_ARGS(state, mask, IP), TP_STRUCT__entry( - __field(struct extent_state *, state) + __field(const struct extent_state *, state) __field(gfp_t, mask) __field(unsigned long, ip) ), @@ -1252,17 +1259,17 @@ TRACE_EVENT(alloc_extent_state, ), TP_printk("state=%p mask=%s caller=%pS", __entry->state, - show_gfp_flags(__entry->mask), (void *)__entry->ip) + show_gfp_flags(__entry->mask), (const void *)__entry->ip) ); TRACE_EVENT(free_extent_state, - TP_PROTO(struct extent_state *state, unsigned long IP), + TP_PROTO(const struct extent_state *state, unsigned long IP), TP_ARGS(state, IP), TP_STRUCT__entry( - __field(struct extent_state *, state) + __field(const struct extent_state *, state) __field(unsigned long, ip) ), @@ -1272,22 +1279,22 @@ TRACE_EVENT(free_extent_state, ), TP_printk("state=%p caller=%pS", __entry->state, - (void *)__entry->ip) + (const void *)__entry->ip) ); DECLARE_EVENT_CLASS(btrfs__work, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(const struct btrfs_work *work), TP_ARGS(work), TP_STRUCT__entry_btrfs( - __field( void *, work ) - __field( void *, wq ) - __field( void *, func ) - __field( void *, ordered_func ) - __field( void *, ordered_free ) - __field( void *, normal_work ) + __field( const void *, work ) + __field( const void *, wq ) + __field( const void *, func ) + __field( const void *, ordered_func ) + __field( const void *, ordered_free ) + __field( const void *, normal_work ) ), TP_fast_assign_btrfs(btrfs_work_owner(work), @@ -1312,12 +1319,12 @@ DECLARE_EVENT_CLASS(btrfs__work, */ DECLARE_EVENT_CLASS(btrfs__work__done, - TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), + TP_PROTO(const struct btrfs_fs_info *fs_info, const void *wtag), TP_ARGS(fs_info, wtag), TP_STRUCT__entry_btrfs( - __field( void *, wtag ) + __field( const void *, wtag ) ), TP_fast_assign_btrfs(fs_info, @@ -1329,40 +1336,41 @@ DECLARE_EVENT_CLASS(btrfs__work__done, DEFINE_EVENT(btrfs__work, btrfs_work_queued, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(const struct btrfs_work *work), TP_ARGS(work) ); DEFINE_EVENT(btrfs__work, btrfs_work_sched, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(const struct btrfs_work *work), TP_ARGS(work) ); DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, - TP_PROTO(struct btrfs_fs_info *fs_info, void *wtag), + TP_PROTO(const struct btrfs_fs_info *fs_info, const void *wtag), TP_ARGS(fs_info, wtag) ); DEFINE_EVENT(btrfs__work, btrfs_ordered_sched, - TP_PROTO(struct btrfs_work *work), + TP_PROTO(const struct btrfs_work *work), TP_ARGS(work) ); DECLARE_EVENT_CLASS(btrfs__workqueue, - TP_PROTO(struct __btrfs_workqueue *wq, const char *name, int high), + TP_PROTO(const struct __btrfs_workqueue *wq, + const char *name, int high), TP_ARGS(wq, name, high), TP_STRUCT__entry_btrfs( - __field( void *, wq ) + __field( const void *, wq ) __string( name, name ) __field( int , high ) ), @@ -1381,19 +1389,20 @@ DECLARE_EVENT_CLASS(btrfs__workqueue, DEFINE_EVENT(btrfs__workqueue, btrfs_workqueue_alloc, - TP_PROTO(struct __btrfs_workqueue *wq, const char *name, int high), + TP_PROTO(const struct __btrfs_workqueue *wq, + const char *name, int high), TP_ARGS(wq, name, high) ); DECLARE_EVENT_CLASS(btrfs__workqueue_done, - TP_PROTO(struct __btrfs_workqueue *wq), + TP_PROTO(const struct __btrfs_workqueue *wq), TP_ARGS(wq), TP_STRUCT__entry_btrfs( - __field( void *, wq ) + __field( const void *, wq ) ), TP_fast_assign_btrfs(btrfs_workqueue_owner(wq), @@ -1405,7 +1414,7 @@ DECLARE_EVENT_CLASS(btrfs__workqueue_done, DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, - TP_PROTO(struct __btrfs_workqueue *wq), + TP_PROTO(const struct __btrfs_workqueue *wq), TP_ARGS(wq) ); @@ -1417,7 +1426,8 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data, - TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op), + TP_PROTO(const struct inode *inode, u64 start, u64 len, + u64 reserved, int op), TP_ARGS(inode, start, len, reserved, op), @@ -1449,21 +1459,24 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_rsv_data, DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_reserve_data, - TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op), + TP_PROTO(const struct inode *inode, u64 start, u64 len, + u64 reserved, int op), TP_ARGS(inode, start, len, reserved, op) ); DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data, - TP_PROTO(struct inode *inode, u64 start, u64 len, u64 reserved, int op), + TP_PROTO(const struct inode *inode, u64 start, u64 len, + u64 reserved, int op), TP_ARGS(inode, start, len, reserved, op) ); DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 ref_root, u64 reserved), + TP_PROTO(const struct btrfs_fs_info *fs_info, + u64 ref_root, u64 reserved), TP_ARGS(fs_info, ref_root, reserved), @@ -1483,14 +1496,15 @@ DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref, DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 ref_root, u64 reserved), + TP_PROTO(const struct btrfs_fs_info *fs_info, + u64 ref_root, u64 reserved), TP_ARGS(fs_info, ref_root, reserved) ); DECLARE_EVENT_CLASS(btrfs_qgroup_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_extent_record *rec), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup_extent_record *rec), TP_ARGS(fs_info, rec), @@ -1511,23 +1525,23 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent, DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_extent_record *rec), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup_extent_record *rec), TP_ARGS(fs_info, rec) ); DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup_extent_record *rec), + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup_extent_record *rec), TP_ARGS(fs_info, rec) ); TRACE_EVENT(btrfs_qgroup_account_extent, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 bytenr, + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots), TP_ARGS(fs_info, bytenr, num_bytes, nr_old_roots, nr_new_roots), @@ -1556,7 +1570,7 @@ TRACE_EVENT(btrfs_qgroup_account_extent, TRACE_EVENT(qgroup_update_counters, - TP_PROTO(struct btrfs_fs_info *fs_info, u64 qgid, + TP_PROTO(const struct btrfs_fs_info *fs_info, u64 qgid, u64 cur_old_count, u64 cur_new_count), TP_ARGS(fs_info, qgid, cur_old_count, cur_new_count), -- cgit From 00142756e1f8015d2f8ce96532d156689db7e448 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 12 Jul 2017 16:20:08 -0600 Subject: btrfs: backref, add tracepoints for prelim_ref insertion and merging This patch adds a tracepoint event for prelim_ref insertion and merging. For each, the ref being inserted or merged and the count of tree nodes is issued. Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 42560feb9920..90d25085762f 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -26,6 +26,7 @@ struct btrfs_work; struct __btrfs_workqueue; struct btrfs_qgroup_extent_record; struct btrfs_qgroup; +struct prelim_ref; #define show_ref_type(type) \ __print_symbolic(type, \ @@ -1636,6 +1637,63 @@ TRACE_EVENT(qgroup_meta_reserve, show_root_type(__entry->refroot), __entry->diff) ); +DECLARE_EVENT_CLASS(btrfs__prelim_ref, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct prelim_ref *oldref, + const struct prelim_ref *newref, u64 tree_size), + TP_ARGS(fs_info, newref, oldref, tree_size), + + TP_STRUCT__entry_btrfs( + __field( u64, root_id ) + __field( u64, objectid ) + __field( u8, type ) + __field( u64, offset ) + __field( int, level ) + __field( int, old_count ) + __field( u64, parent ) + __field( u64, bytenr ) + __field( int, mod_count ) + __field( u64, tree_size ) + ), + + TP_fast_assign_btrfs(fs_info, + __entry->root_id = oldref->root_id; + __entry->objectid = oldref->key_for_search.objectid; + __entry->type = oldref->key_for_search.type; + __entry->offset = oldref->key_for_search.offset; + __entry->level = oldref->level; + __entry->old_count = oldref->count; + __entry->parent = oldref->parent; + __entry->bytenr = oldref->wanted_disk_byte; + __entry->mod_count = newref ? newref->count : 0; + __entry->tree_size = tree_size; + ), + + TP_printk_btrfs("root_id=%llu key=[%llu,%u,%llu] level=%d count=[%d+%d=%d] parent=%llu wanted_disk_byte=%llu nodes=%llu", + (unsigned long long)__entry->root_id, + (unsigned long long)__entry->objectid, __entry->type, + (unsigned long long)__entry->offset, __entry->level, + __entry->old_count, __entry->mod_count, + __entry->old_count + __entry->mod_count, + (unsigned long long)__entry->parent, + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->tree_size) +); + +DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_merge, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct prelim_ref *oldref, + const struct prelim_ref *newref, u64 tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size) +); + +DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert, + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct prelim_ref *oldref, + const struct prelim_ref *newref, u64 tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ -- cgit From 7bdd6277e0dc2beb4f5db5ea4ff7670ecf0b5879 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 11 Jul 2017 13:25:13 +0300 Subject: btrfs: Remove redundant argument of flush_space All callers of flush_space pass the same number for orig/num_bytes arguments. Let's remove one of the numbers and also modify the trace point to show only a single number - bytes requested. Seems that last point where the two parameters were treated differently is before the ticketed enospc rework. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 90d25085762f..1e4908dcd065 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1011,15 +1011,14 @@ TRACE_EVENT(btrfs_trigger_flush, TRACE_EVENT(btrfs_flush_space, TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes, - u64 orig_bytes, int state, int ret), + int state, int ret), - TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret), + TP_ARGS(fs_info, flags, num_bytes, state, ret), TP_STRUCT__entry( __array( u8, fsid, BTRFS_UUID_SIZE ) __field( u64, flags ) __field( u64, num_bytes ) - __field( u64, orig_bytes ) __field( int, state ) __field( int, ret ) ), @@ -1028,19 +1027,17 @@ TRACE_EVENT(btrfs_flush_space, memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); __entry->flags = flags; __entry->num_bytes = num_bytes; - __entry->orig_bytes = orig_bytes; __entry->state = state; __entry->ret = ret; ), - TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu " - "orig_bytes=%llu ret=%d", __entry->fsid, __entry->state, + TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d", + __entry->fsid, __entry->state, show_flush_state(__entry->state), (unsigned long long)__entry->flags, __print_flags((unsigned long)__entry->flags, "|", BTRFS_GROUP_FLAGS), - (unsigned long long)__entry->num_bytes, - (unsigned long long)__entry->orig_bytes, __entry->ret) + (unsigned long long)__entry->num_bytes, __entry->ret) ); DECLARE_EVENT_CLASS(btrfs__reserved_extent, -- cgit From b19af510e67e6ca696b8721f45c148119437307c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Aug 2017 16:53:16 +0200 Subject: genirq/irq_sim: Add a simple interrupt simulator framework Implement a simple, irq_work-based framework for simulating interrupts. Currently the API exposes routines for initializing and deinitializing the simulator object, enqueueing the interrupts and retrieving the allocated interrupt numbers based on the offset of the dummy interrupt in the simulator struct. Signed-off-by: Bartosz Golaszewski Reviewed-by: Jonathan Cameron Cc: Lars-Peter Clausen Cc: Jonathan Corbet Cc: Marc Zyngier Cc: Linus Walleij Cc: linux-doc@vger.kernel.org Cc: linux-gpio@vger.kernel.org Cc: Bamvor Jian Zhang Cc: Jonathan Cameron Link: http://lkml.kernel.org/r/20170814145318.6495-2-brgl@bgdev.pl Signed-off-by: Thomas Gleixner --- include/linux/irq_sim.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/irq_sim.h (limited to 'include') diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h new file mode 100644 index 000000000000..39ce57bfa995 --- /dev/null +++ b/include/linux/irq_sim.h @@ -0,0 +1,41 @@ +#ifndef _LINUX_IRQ_SIM_H +#define _LINUX_IRQ_SIM_H +/* + * Copyright (C) 2017 Bartosz Golaszewski + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +/* + * Provides a framework for allocating simulated interrupts which can be + * requested like normal irqs and enqueued from process context. + */ + +struct irq_sim_work_ctx { + struct irq_work work; + int irq; +}; + +struct irq_sim_irq_ctx { + int irqnum; + bool enabled; +}; + +struct irq_sim { + struct irq_sim_work_ctx work_ctx; + int irq_base; + unsigned int irq_count; + struct irq_sim_irq_ctx *irqs; +}; + +int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs); +void irq_sim_fini(struct irq_sim *sim); +void irq_sim_fire(struct irq_sim *sim, unsigned int offset); +int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset); + +#endif /* _LINUX_IRQ_SIM_H */ -- cgit From 44e72c7ebf294043cfe276f7328b8c0e6a3e50e9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Aug 2017 16:53:17 +0200 Subject: genirq/irq_sim: Add a devres variant of irq_sim_init() Add a resource managed version of irq_sim_init(). This can be conveniently used in device drivers. Signed-off-by: Bartosz Golaszewski Acked-by: Jonathan Cameron Cc: Lars-Peter Clausen Cc: Jonathan Corbet Cc: Marc Zyngier Cc: Linus Walleij Cc: linux-doc@vger.kernel.org Cc: linux-gpio@vger.kernel.org Cc: Bamvor Jian Zhang Cc: Jonathan Cameron Link: http://lkml.kernel.org/r/20170814145318.6495-3-brgl@bgdev.pl Signed-off-by: Thomas Gleixner --- include/linux/irq_sim.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h index 39ce57bfa995..0380d899b955 100644 --- a/include/linux/irq_sim.h +++ b/include/linux/irq_sim.h @@ -10,6 +10,7 @@ */ #include +#include /* * Provides a framework for allocating simulated interrupts which can be @@ -34,6 +35,8 @@ struct irq_sim { }; int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs); +int devm_irq_sim_init(struct device *dev, struct irq_sim *sim, + unsigned int num_irqs); void irq_sim_fini(struct irq_sim *sim); void irq_sim_fire(struct irq_sim *sim, unsigned int offset); int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset); -- cgit From 6f3d791f300618caf82a2be0c27456edd76d5164 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 11 Aug 2017 10:03:59 -0700 Subject: Drivers: hv: vmbus: Fix rescind handling issues This patch handles the following issues that were observed when we are handling racing channel offer message and rescind message for the same offer: 1. Since the host does not respond to messages on a rescinded channel, in the current code, we could be indefinitely blocked on the vmbus_open() call. 2. When a rescinded channel is being closed, if there is a pending interrupt on the channel, we could end up freeing the channel that the interrupt handler would run on. Signed-off-by: K. Y. Srinivasan Reviewed-by: Dexuan Cui Tested-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 27db4e650b8c..07650d0232cc 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -879,6 +879,8 @@ struct vmbus_channel { */ enum hv_numa_policy affinity_policy; + bool probe_done; + }; static inline bool is_hvsock_channel(const struct vmbus_channel *c) -- cgit From 0d58e6c1b19b30623b5f0a053818bd2c32d61166 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 15 Aug 2017 12:02:17 -0700 Subject: PCI: Add pci_irqd_intx_xlate() Legacy PCI INTx interrupts are represented in the PCI_INTERRUPT_PIN register using the range 1-4, which matches our enum pci_interrupt_pin. This is however not ideal for an IRQ domain, where with 4 interrupts we would ideally have a domain of size 4 & hwirq numbers in the range 0-3. Different PCI host controller drivers have handled this in different ways. Of those under drivers/pci/ which register an INTx IRQ domain, we have: - pcie-altera uses the range 1-4 in device trees and an IRQ domain of size 5 to cover that range, with entry 0 wasted. - pcie-xilinx & pcie-xilinx-nwl use the range 1-4 in device trees but register an IRQ domain of size 4, which doesn't cover the hwirq=4/INTD case leading to that interrupt being broken. - pci-ftpci100 & pci-aardvark use the range 0-3 in both device trees & as hwirq numbering in the driver & IRQ domain. In order to introduce some level of consistency in at least the hwirq numbering used by the drivers & IRQ domains, this patch introduces a new pci_irqd_intx_xlate() helper function which drivers using the 1-4 range in device trees can assign as the xlate callback for their INTx IRQ domain. This translates the 1-4 range into a 0-3 range, allowing us to use an IRQ domain of size 4 & avoid a wasted entry. Further patches will make use of this in drivers to allow them to use an IRQ domain of size 4 for legacy INTx interrupts without breaking INTD. Signed-off-by: Paul Burton Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index bb9c367c85f0..bbc2a991b63f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1416,6 +1416,38 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, NULL); } +/** + * pci_irqd_intx_xlate() - Translate PCI INTx value to an IRQ domain hwirq + * @d: the INTx IRQ domain + * @node: the DT node for the device whose interrupt we're translating + * @intspec: the interrupt specifier data from the DT + * @intsize: the number of entries in @intspec + * @out_hwirq: pointer at which to write the hwirq number + * @out_type: pointer at which to write the interrupt type + * + * Translate a PCI INTx interrupt number from device tree in the range 1-4, as + * stored in the standard PCI_INTERRUPT_PIN register, to a value in the range + * 0-3 suitable for use in a 4 entry IRQ domain. That is, subtract one from the + * INTx value to obtain the hwirq number. + * + * Returns 0 on success, or -EINVAL if the interrupt specifier is out of range. + */ +static inline int pci_irqd_intx_xlate(struct irq_domain *d, + struct device_node *node, + const u32 *intspec, + unsigned int intsize, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + const u32 intx = intspec[0]; + + if (intx < PCI_INTERRUPT_INTA || intx > PCI_INTERRUPT_INTD) + return -EINVAL; + + *out_hwirq = intx - PCI_INTERRUPT_INTA; + return 0; +} + #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_auto; -- cgit From b005fd189cec9407b700599e1e80e0552446ee79 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:31:58 -0700 Subject: bpf: introduce new program type for skbs on sockets A class of programs, run from strparser and soon from a new map type called sock map, are used with skb as the context but on established sockets. By creating a specific program type for these we can use bpf helpers that expect full sockets and get the verifier to ensure these helpers are not used out of context. The new type is BPF_PROG_TYPE_SK_SKB. This patch introduces the infrastructure and type. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index b1e1035ca24b..4b72db30dacf 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -11,6 +11,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops) #endif #ifdef CONFIG_BPF_EVENTS BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 91da8371a2d0..2e796e384aeb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -127,6 +127,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, }; enum bpf_attach_type { -- cgit From a6f6df69c48b86cd84f36c70593eb4968fceb34a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:32:22 -0700 Subject: bpf: export bpf_prog_inc_not_zero bpf_prog_inc_not_zero will be used by upcoming sockmap patches this patch simply exports it so we can pull it in. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 39229c455cba..d6e1de8ce0fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -252,6 +252,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); +struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); int __bpf_prog_charge(struct user_struct *user, u32 pages); void __bpf_prog_uncharge(struct user_struct *user, u32 pages); @@ -344,6 +345,12 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) return ERR_PTR(-EOPNOTSUPP); } +static inline struct bpf_prog *__must_check +bpf_prog_inc_not_zero(struct bpf_prog *prog) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) { return 0; -- cgit From 174a79ff9515f400b9a6115643dafd62a635b7e6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:32:47 -0700 Subject: bpf: sockmap with sk redirect support Recently we added a new map type called dev map used to forward XDP packets between ports (6093ec2dc313). This patches introduces a similar notion for sockets. A sockmap allows users to add participating sockets to a map. When sockets are added to the map enough context is stored with the map entry to use the entry with a new helper bpf_sk_redirect_map(map, key, flags) This helper (analogous to bpf_redirect_map in XDP) is given the map and an entry in the map. When called from a sockmap program, discussed below, the skb will be sent on the socket using skb_send_sock(). With the above we need a bpf program to call the helper from that will then implement the send logic. The initial site implemented in this series is the recv_sock hook. For this to work we implemented a map attach command to add attributes to a map. In sockmap we add two programs a parse program and a verdict program. The parse program uses strparser to build messages and pass them to the verdict program. The parse programs use the normal strparser semantics. The verdict program is of type SK_SKB. The verdict program returns a verdict SK_DROP, or SK_REDIRECT for now. Additional actions may be added later. When SK_REDIRECT is returned, expected when bpf program uses bpf_sk_redirect_map(), the sockmap logic will consult per cpu variables set by the helper routine and pull the sock entry out of the sock map. This pattern follows the existing redirect logic in cls and xdp programs. This gives the flow, recv_sock -> str_parser (parse_prog) -> verdict_prog -> skb_send_sock \ -> kfree_skb As an example use case a message based load balancer may use specific logic in the verdict program to select the sock to send on. Sample programs are provided in future patches that hopefully illustrate the user interfaces. Also selftests are in follow-on patches. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++-- include/linux/bpf_types.h | 1 + include/linux/filter.h | 2 ++ include/uapi/linux/bpf.h | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d6e1de8ce0fc..a4145e9c74b5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -16,6 +16,7 @@ #include struct perf_event; +struct bpf_prog; struct bpf_map; /* map is generic key/value storage optionally accesible by eBPF programs */ @@ -37,6 +38,8 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); + int (*map_attach)(struct bpf_map *map, + struct bpf_prog *p1, struct bpf_prog *p2); }; struct bpf_map { @@ -138,8 +141,6 @@ enum bpf_reg_type { PTR_TO_PACKET_END, /* skb->data + headlen */ }; -struct bpf_prog; - /* The information passed from prog-specific *_is_valid_access * back to the verifier. */ @@ -312,6 +313,7 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); @@ -391,6 +393,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; +extern const struct bpf_func_proto bpf_sock_map_update_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 4b72db30dacf..fa805074d168 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -38,4 +38,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index d19ed3c15e1e..7015116331af 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -727,6 +727,8 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); +struct sock *do_sk_redirect_map(void); + #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e796e384aeb..7f774769e3f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -110,6 +110,7 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, }; enum bpf_prog_type { @@ -135,11 +136,15 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, + BPF_CGROUP_SMAP_INGRESS, __MAX_BPF_ATTACH_TYPE }; #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */ +#define BPF_SOCKMAP_STRPARSER (1U << 0) + /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup @@ -211,6 +216,7 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -557,6 +563,23 @@ union bpf_attr { * @mode: operation mode (enum bpf_adj_room_mode) * @flags: reserved for future use * Return: 0 on success or negative error code + * + * int bpf_sk_redirect_map(map, key, flags) + * Redirect skb to a sock in map using key as a lookup key for the + * sock in map. + * @map: pointer to sockmap + * @key: key to lookup sock in map + * @flags: reserved for future use + * Return: SK_REDIRECT + * + * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * @skops: pointer to bpf_sock_ops + * @map: pointer to sockmap to update + * @key: key to insert/update sock in map + * @flags: same flags as map update elem + * @map_flags: sock map specific flags + * bit 1: Enable strparser + * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -610,7 +633,9 @@ union bpf_attr { FN(set_hash), \ FN(setsockopt), \ FN(skb_adjust_room), \ - FN(redirect_map), + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -747,6 +772,12 @@ struct xdp_md { __u32 data_end; }; +enum sk_action { + SK_ABORTED = 0, + SK_DROP, + SK_REDIRECT, +}; + #define BPF_TAG_SIZE 8 struct bpf_prog_info { -- cgit From 8a31db5615667956c513d205cfb06885c3ec6d0b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 15 Aug 2017 22:33:09 -0700 Subject: bpf: add access to sock fields and pkt data from sk_skb programs Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7f774769e3f5..5ecbe812a2cc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -712,6 +712,15 @@ struct __sk_buff { __u32 data; __u32 data_end; __u32 napi_id; + + /* accessed by BPF_PROG_TYPE_sk_skb types */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; struct bpf_tunnel_key { -- cgit From 729749bb8da186e68d97d1b0439f0b1e0059c41d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 13 Aug 2017 10:03:59 -0400 Subject: SUNRPC: Don't hold the transport lock across socket copy operations Instead add a mechanism to ensure that the request doesn't disappear from underneath us while copying from the socket. We do this by preventing xprt_release() from freeing the XDR buffers until the flag RPC_TASK_MSG_RECV has been cleared from the request. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever --- include/linux/sunrpc/sched.h | 2 ++ include/linux/sunrpc/xprt.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 50a99a117da7..c1768f9d993b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -139,6 +139,8 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 #define RPC_TASK_ACTIVE 2 +#define RPC_TASK_MSG_RECV 3 +#define RPC_TASK_MSG_RECV_WAIT 4 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eab1c749e192..65b9e0224753 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -372,6 +372,8 @@ void xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_rqst(struct rpc_task *task, int copied); +void xprt_pin_rqst(struct rpc_rqst *req); +void xprt_unpin_rqst(struct rpc_rqst *req); void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); -- cgit From e543002f77f463501d47fab43acf7ba881e9dcaf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 15 Aug 2017 21:11:03 +0200 Subject: qdisc: add tracepoint qdisc:qdisc_dequeue for dequeued SKBs The main purpose of this tracepoint is to monitor bulk dequeue in the network qdisc layer, as it cannot be deducted from the existing qdisc stats. The txq_state can be used for determining the reason for zero packet dequeues, see enum netdev_queue_state_t. Notice all packets doesn't necessary activate this tracepoint. As qdiscs with flag TCQ_F_CAN_BYPASS, can directly invoke sch_direct_xmit() when qdisc_qlen is zero. Remember that perf record supports filters like: perf record -e qdisc:qdisc_dequeue \ --filter 'ifindex == 4 && (packets > 1 || txq_state > 0)' Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/trace/events/qdisc.h (limited to 'include') diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h new file mode 100644 index 000000000000..60d0d8bd336d --- /dev/null +++ b/include/trace/events/qdisc.h @@ -0,0 +1,50 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM qdisc + +#if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_QDISC_H_ + +#include +#include +#include +#include + +TRACE_EVENT(qdisc_dequeue, + + TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq, + int packets, struct sk_buff *skb), + + TP_ARGS(qdisc, txq, packets, skb), + + TP_STRUCT__entry( + __field( struct Qdisc *, qdisc ) + __field(const struct netdev_queue *, txq ) + __field( int, packets ) + __field( void *, skbaddr ) + __field( int, ifindex ) + __field( u32, handle ) + __field( u32, parent ) + __field( unsigned long, txq_state) + ), + + /* skb==NULL indicate packets dequeued was 0, even when packets==1 */ + TP_fast_assign( + __entry->qdisc = qdisc; + __entry->txq = txq; + __entry->packets = skb ? packets : 0; + __entry->skbaddr = skb; + __entry->ifindex = txq->dev ? txq->dev->ifindex : 0; + __entry->handle = qdisc->handle; + __entry->parent = qdisc->parent; + __entry->txq_state = txq->state; + ), + + TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p", + __entry->ifindex, __entry->handle, __entry->parent, + __entry->txq_state, __entry->packets, __entry->skbaddr ) +); + +#endif /* _TRACE_QDISC_H_ */ + +/* This part must be outside protection */ +#include -- cgit From 6bdc9c4c31c81688e19cb186d49be01bbb6a1618 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 16 Aug 2017 15:02:32 -0700 Subject: bpf: sock_map fixes for !CONFIG_BPF_SYSCALL and !STREAM_PARSER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve issues with !CONFIG_BPF_SYSCALL and !STREAM_PARSER net/core/filter.c: In function ‘do_sk_redirect_map’: net/core/filter.c:1881:3: error: implicit declaration of function ‘__sock_map_lookup_elem’ [-Werror=implicit-function-declaration] sk = __sock_map_lookup_elem(ri->map, ri->ifindex); ^ net/core/filter.c:1881:6: warning: assignment makes pointer from integer without a cast [enabled by default] sk = __sock_map_lookup_elem(ri->map, ri->ifindex); Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Reported-by: Eric Dumazet Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 +++++++++- include/linux/bpf_types.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a4145e9c74b5..1cc6c5ff61ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -313,7 +313,6 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); /* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); @@ -377,6 +376,15 @@ static inline void __dev_map_flush(struct bpf_map *map) } #endif /* CONFIG_BPF_SYSCALL */ +#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) +struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); +#else +static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) +{ + return NULL; +} +#endif + /* verifier prototypes for helper functions called from eBPF programs */ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fa805074d168..6f1a567667b8 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -38,5 +38,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +#ifdef CONFIG_STREAM_PARSER BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif +#endif -- cgit From 9a603b8e1136f2b55f780fefbcbf84d31844ff2b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 16 Aug 2017 08:56:24 -0700 Subject: vmbus: remove unused vmbus_sendpacket_multipagebuffer This function is not used anywhere in current code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b7d7bbec74e0..39a080ce17da 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1052,12 +1052,6 @@ extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, u64 requestid, u32 flags); -extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, - struct hv_multipage_buffer *mpb, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, -- cgit From 5a668d8cddbe8bf14379ce110c49ca088a1e9fae Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 16 Aug 2017 08:56:25 -0700 Subject: vmbus: remove unused vmubs_sendpacket_pagebuffer_ctl The function vmbus_sendpacket_pagebuffer_ctl was never used directly. Just have vmbus_send_pagebuffer Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 39a080ce17da..9692592d43a3 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1044,14 +1044,6 @@ extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, u32 bufferlen, u64 requestid); -extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid, - u32 flags); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, -- cgit From 5dd0fb9b9ffc0ef9b312d05604f4ad0fffc50505 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 16 Aug 2017 08:56:26 -0700 Subject: vmbus: remove unused vmbus_sendpacket_ctl The only usage of vmbus_sendpacket_ctl was by vmbus_sendpacket. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 9692592d43a3..a5f961c4149e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1030,13 +1030,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel, - void *buffer, - u32 bufferLen, - u64 requestid, - enum vmbus_packet_type type, - u32 flags); - extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, struct hv_page_buffer pagebuffers[], u32 pagecount, -- cgit From 81fbfe8adaf38d4f5a98c19bebfd41c5d6acaee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 10:36:47 -0700 Subject: ptr_ring: use kmalloc_array() As found by syzkaller, malicious users can set whatever tx_queue_len on a tun device and eventually crash the kernel. Lets remove the ALIGN(XXX, SMP_CACHE_BYTES) thing since a small ring buffer is not fast anyway. Fixes: 2e0ab8ca83c1 ("ptr_ring: array based FIFO for pointers") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Michael S. Tsirkin Cc: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 9 +++++---- include/linux/skb_array.h | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index d8c97ec8a8e6..37b4bb2545b3 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -436,9 +436,9 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, __PTR_RING_PEEK_CALL_v; \ }) -static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) +static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) { - return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); + return kcalloc(size, sizeof(void *), gfp); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -582,7 +582,8 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, + unsigned int nrings, int size, gfp_t gfp, void (*destroy)(void *)) { @@ -590,7 +591,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, void ***queues; int i; - queues = kmalloc(nrings * sizeof *queues, gfp); + queues = kmalloc_array(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 35226cd4efb0..8621ffdeecbf 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -193,7 +193,8 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) } static inline int skb_array_resize_multiple(struct skb_array **rings, - int nrings, int size, gfp_t gfp) + int nrings, unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); return ptr_ring_resize_multiple((struct ptr_ring **)rings, -- cgit From c780a049f9bf442314335372c9abc4548bfe3e44 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2017 11:09:12 -0700 Subject: ipv4: better IP_MAX_MTU enforcement While working on yet another syzkaller report, I found that our IP_MAX_MTU enforcements were not properly done. gcc seems to reload dev->mtu for min(dev->mtu, IP_MAX_MTU), and final result can be bigger than IP_MAX_MTU :/ This is a problem because device mtu can be changed on other cpus or threads. While this patch does not fix the issue I am working on, it is probably worth addressing it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 821cedcc8e73..0cf7f5a65fe6 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,7 +352,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, !forwarding) return dst_mtu(dst); - return min(dst->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, @@ -364,7 +364,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); } - return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); + return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } u32 ip_idents_reserve(u32 hash, int segs); -- cgit From 70e42fd02c46e2aa9ab07b766d418637e3a51de7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 9 Aug 2017 12:00:22 +0900 Subject: scsi: sd_zbc: Write unlock zone from sd_uninit_cmnd() Releasing a zone write lock only when the write commnand that acquired the lock completes can cause deadlocks due to potential command reordering if the lock owning request is requeued and not executed. This problem exists only with the scsi-mq path as, unlike the legacy path, requests are moved out of the dispatch queue before being prepared and so before locking a zone for a write command. Since sd_uninit_cmnd() is now always called when a request is requeued, call sd_zbc_write_unlock_zone() from that function for write requests that acquired a zone lock instead of from sd_done(). Acquisition of a zone lock by a write command is indicated using the new command flag SCMD_ZONE_WRITE_LOCK. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index a1266d318c85..6af198d8120b 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -57,6 +57,7 @@ struct scsi_pointer { /* for scmd->flags */ #define SCMD_TAGGED (1 << 0) #define SCMD_UNCHECKED_ISA_DMA (1 << 1) +#define SCMD_ZONE_WRITE_LOCK (1 << 2) struct scsi_cmnd { struct scsi_request req; -- cgit From ce0fa3e56ad20f04d8252353dcd24e924abdafca Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 16 Aug 2017 10:18:03 -0700 Subject: x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages Speculative processor accesses may reference any memory that has a valid page table entry. While a speculative access won't generate a machine check, it will log the error in a machine check bank. That could cause escalation of a subsequent error since the overflow bit will be then set in the machine check bank status register. Code has to be double-plus-tricky to avoid mentioning the 1:1 virtual address of the page we want to map out otherwise we may trigger the very problem we are trying to avoid. We use a non-canonical address that passes through the usual Linux table walking code to get to the same "pte". Thanks to Dave Hansen for reviewing several iterations of this. Also see: http://marc.info/?l=linux-mm&m=149860136413338&w=2 Signed-off-by: Tony Luck Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Elliott, Robert (Persistent Memory) Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170816171803.28342-1-tony.luck@intel.com Signed-off-by: Ingo Molnar --- include/linux/mm_inline.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index e030a68ead7e..25438b2b6f22 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -126,4 +126,10 @@ static __always_inline enum lru_list page_lru(struct page *page) #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +#ifdef arch_unmap_kpfn +extern void arch_unmap_kpfn(unsigned long pfn); +#else +static __always_inline void arch_unmap_kpfn(unsigned long pfn) { } +#endif + #endif -- cgit From 7a46ec0e2f4850407de5e1d19a44edee6efa58ec Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Aug 2017 09:19:24 -0700 Subject: locking/refcounts, x86/asm: Implement fast refcount overflow protection This implements refcount_t overflow protection on x86 without a noticeable performance impact, though without the fuller checking of REFCOUNT_FULL. This is done by duplicating the existing atomic_t refcount implementation but with normally a single instruction added to detect if the refcount has gone negative (e.g. wrapped past INT_MAX or below zero). When detected, the handler saturates the refcount_t to INT_MIN / 2. With this overflow protection, the erroneous reference release that would follow a wrap back to zero is blocked from happening, avoiding the class of refcount-overflow use-after-free vulnerabilities entirely. Only the overflow case of refcounting can be perfectly protected, since it can be detected and stopped before the reference is freed and left to be abused by an attacker. There isn't a way to block early decrements, and while REFCOUNT_FULL stops increment-from-zero cases (which would be the state _after_ an early decrement and stops potential double-free conditions), this fast implementation does not, since it would require the more expensive cmpxchg loops. Since the overflow case is much more common (e.g. missing a "put" during an error path), this protection provides real-world protection. For example, the two public refcount overflow use-after-free exploits published in 2016 would have been rendered unexploitable: http://perception-point.io/2016/01/14/analysis-and-exploitation-of-a-linux-kernel-vulnerability-cve-2016-0728/ http://cyseclabs.com/page?n=02012016 This implementation does, however, notice an unchecked decrement to zero (i.e. caller used refcount_dec() instead of refcount_dec_and_test() and it resulted in a zero). Decrements under zero are noticed (since they will have resulted in a negative value), though this only indicates that a use-after-free may have already happened. Such notifications are likely avoidable by an attacker that has already exploited a use-after-free vulnerability, but it's better to have them reported than allow such conditions to remain universally silent. On first overflow detection, the refcount value is reset to INT_MIN / 2 (which serves as a saturation value) and a report and stack trace are produced. When operations detect only negative value results (such as changing an already saturated value), saturation still happens but no notification is performed (since the value was already saturated). On the matter of races, since the entire range beyond INT_MAX but before 0 is negative, every operation at INT_MIN / 2 will trap, leaving no overflow-only race condition. As for performance, this implementation adds a single "js" instruction to the regular execution flow of a copy of the standard atomic_t refcount operations. (The non-"and_test" refcount_dec() function, which is uncommon in regular refcount design patterns, has an additional "jz" instruction to detect reaching exactly zero.) Since this is a forward jump, it is by default the non-predicted path, which will be reinforced by dynamic branch prediction. The result is this protection having virtually no measurable change in performance over standard atomic_t operations. The error path, located in .text.unlikely, saves the refcount location and then uses UD0 to fire a refcount exception handler, which resets the refcount, handles reporting, and returns to regular execution. This keeps the changes to .text size minimal, avoiding return jumps and open-coded calls to the error reporting routine. Example assembly comparison: refcount_inc() before: .text: ffffffff81546149: f0 ff 45 f4 lock incl -0xc(%rbp) refcount_inc() after: .text: ffffffff81546149: f0 ff 45 f4 lock incl -0xc(%rbp) ffffffff8154614d: 0f 88 80 d5 17 00 js ffffffff816c36d3 ... .text.unlikely: ffffffff816c36d3: 48 8d 4d f4 lea -0xc(%rbp),%rcx ffffffff816c36d7: 0f ff (bad) These are the cycle counts comparing a loop of refcount_inc() from 1 to INT_MAX and back down to 0 (via refcount_dec_and_test()), between unprotected refcount_t (atomic_t), fully protected REFCOUNT_FULL (refcount_t-full), and this overflow-protected refcount (refcount_t-fast): 2147483646 refcount_inc()s and 2147483647 refcount_dec_and_test()s: cycles protections atomic_t 82249267387 none refcount_t-fast 82211446892 overflow, untested dec-to-zero refcount_t-full 144814735193 overflow, untested dec-to-zero, inc-from-zero This code is a modified version of the x86 PAX_REFCOUNT atomic_t overflow defense from the last public patch of PaX/grsecurity, based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Thanks to PaX Team for various suggestions for improvement for repurposing this code to be a refcount-only protection. Signed-off-by: Kees Cook Reviewed-by: Josh Poimboeuf Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: David S. Miller Cc: Davidlohr Bueso Cc: Elena Reshetova Cc: Eric Biggers Cc: Eric W. Biederman Cc: Greg KH Cc: Hans Liljestrand Cc: James Bottomley Cc: Jann Horn Cc: Linus Torvalds Cc: Manfred Spraul Cc: Peter Zijlstra Cc: Rik van Riel Cc: Serge E. Hallyn Cc: Thomas Gleixner Cc: arozansk@redhat.com Cc: axboe@kernel.dk Cc: kernel-hardening@lists.openwall.com Cc: linux-arch Link: http://lkml.kernel.org/r/20170815161924.GA133115@beast Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 7 +++++++ include/linux/refcount.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bd6d96cf80b1..6607225d0ea4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -277,6 +277,13 @@ extern int oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; +#ifdef CONFIG_ARCH_HAS_REFCOUNT +void refcount_error_report(struct pt_regs *regs, const char *err); +#else +static inline void refcount_error_report(struct pt_regs *regs, const char *err) +{ } +#endif + /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 591792c8e5b0..48b7c9c68c4d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -53,6 +53,9 @@ extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); extern __must_check bool refcount_dec_and_test(refcount_t *r); extern void refcount_dec(refcount_t *r); #else +# ifdef CONFIG_ARCH_HAS_REFCOUNT +# include +# else static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) { return atomic_add_unless(&r->refs, i, 0); @@ -87,6 +90,7 @@ static inline void refcount_dec(refcount_t *r) { atomic_dec(&r->refs); } +# endif /* !CONFIG_ARCH_HAS_REFCOUNT */ #endif /* CONFIG_REFCOUNT_FULL */ extern __must_check bool refcount_dec_if_one(refcount_t *r); -- cgit From 02e43c2dcd3b3cf7244f6dda65a07e8dacadaf8d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 16 Aug 2017 21:46:51 +0800 Subject: efi: Introduce efi_early_memdesc_ptr to get pointer to memmap descriptor The existing map iteration helper for_each_efi_memory_desc_in_map can only be used after the kernel initializes the EFI subsystem to set up struct efi_memory_map. Before that we also need iterate map descriptors which are stored in several intermediate structures, like struct efi_boot_memmap for arch independent usage and struct efi_info for x86 arch only. Introduce efi_early_memdesc_ptr() to get pointer to a map descriptor, and replace several places where that primitive is open coded. Signed-off-by: Baoquan He [ Various improvements to the text. ] Acked-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: ard.biesheuvel@linaro.org Cc: fanc.fnst@cn.fujitsu.com Cc: izumi.taku@jp.fujitsu.com Cc: keescook@chromium.org Cc: linux-efi@vger.kernel.org Cc: n-horiguchi@ah.jp.nec.com Cc: thgarnie@google.com Link: http://lkml.kernel.org/r/20170816134651.GF21273@x1 Signed-off-by: Ingo Molnar --- include/linux/efi.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 8269bcb8ccf7..a686ca9a7e5c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1020,6 +1020,28 @@ extern int efi_memattr_init(void); extern int efi_memattr_apply_permissions(struct mm_struct *mm, efi_memattr_perm_setter fn); +/* + * efi_early_memdesc_ptr - get the n-th EFI memmap descriptor + * @map: the start of efi memmap + * @desc_size: the size of space for each EFI memmap descriptor + * @n: the index of efi memmap descriptor + * + * EFI boot service provides the GetMemoryMap() function to get a copy of the + * current memory map which is an array of memory descriptors, each of + * which describes a contiguous block of memory. It also gets the size of the + * map, and the size of each descriptor, etc. + * + * Note that per section 6.2 of UEFI Spec 2.6 Errata A, the returned size of + * each descriptor might not be equal to sizeof(efi_memory_memdesc_t), + * since efi_memory_memdesc_t may be extended in the future. Thus the OS + * MUST use the returned size of the descriptor to find the start of each + * efi_memory_memdesc_t in the memory map array. This should only be used + * during bootup since for_each_efi_memory_desc_xxx() is available after the + * kernel initializes the EFI subsystem to set up struct efi_memory_map. + */ +#define efi_early_memdesc_ptr(map, desc_size, n) \ + (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size))) + /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ -- cgit From ea3f2c0fdfbb180f142cdbc0d1f055c7b9a5e63e Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 17 Aug 2017 17:57:41 +0900 Subject: locking/lockdep: Rename CONFIG_LOCKDEP_COMPLETE to CONFIG_LOCKDEP_COMPLETIONS 'complete' is an adjective and LOCKDEP_COMPLETE sounds like 'lockdep is complete', so pick a better name that uses a noun. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/1502960261-16206-3-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/completion.h b/include/linux/completion.h index 9bcebf509b4d..791f053f28b7 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -9,7 +9,7 @@ */ #include -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS #include #endif @@ -28,12 +28,12 @@ struct completion { unsigned int done; wait_queue_head_t wait; -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS struct lockdep_map_cross map; #endif }; -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS static inline void complete_acquire(struct completion *x) { lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); @@ -64,7 +64,7 @@ static inline void complete_release(struct completion *x) {} static inline void complete_release_commit(struct completion *x) {} #endif -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } -- cgit From 52fa5bc5cbba089f09bc2c372e3432f3f3e48051 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 17 Aug 2017 17:46:12 +0800 Subject: locking/lockdep: Explicitly initialize wq_barrier::done::map With the new lockdep crossrelease feature, which checks completions usage, a false positive is reported in the workqueue code: > Worker A : acquired of wfc.work -> wait for cpu_hotplug_lock to be released > Task B : acquired of cpu_hotplug_lock -> wait for lock#3 to be released > Task C : acquired of lock#3 -> wait for completion of barr->done > (Task C is in lru_add_drain_all_cpuslocked()) > Worker D : wait for wfc.work to be released -> will complete barr->done Such a dead lock can not happen because Task C's barr->done and Worker D's barr->done can not be the same instance. The reason of this false positive is we initialize all wq_barrier::done at insert_wq_barrier() via init_completion(), which makes them belong to the same lock class, therefore, impossible circles are reported. To fix this, explicitly initialize the lockdep map for wq_barrier::done in insert_wq_barrier(), so that the lock class key of wq_barrier::done is a subkey of the corresponding work_struct, as a result we won't build a dependency between a wq_barrier with a unrelated work, and we can differ wq barriers based on the related works, so the false positive above is avoided. Also define the empty lockdep_init_map_crosslock() for !CROSSRELEASE to make the code simple and away from unnecessary #ifdefs. Reported-by: Ingo Molnar Signed-off-by: Boqun Feng Cc: Byungchul Park Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170817094622.12915-1-boqun.feng@gmail.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 651cc61af041..fc827cab6d6e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -583,6 +583,7 @@ extern void crossrelease_hist_end(enum xhlock_context_t c); extern void lockdep_init_task(struct task_struct *task); extern void lockdep_free_task(struct task_struct *task); #else +#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. * Note that _name must not be NULL. -- cgit From 7e42776d5ed1fe9a941ed8876c5d15cd7cf5d89f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 May 2017 08:05:00 -0700 Subject: rcu: Drive TASKS_RCU directly off of PREEMPT The actual use of TASKS_RCU is only when PREEMPT, otherwise RCU-sched is used instead. This commit therefore makes synchronize_rcu_tasks() and call_rcu_tasks() available always, but mapped to synchronize_sched() and call_rcu_sched(), respectively, when !PREEMPT. This approach also allows some #ifdefs to be removed from rcutorture. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Masami Hiramatsu Acked-by: Ingo Molnar --- include/linux/rcupdate.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f816fc72b51e..c3f380befdd7 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -58,8 +58,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); void synchronize_sched(void); -void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); -void synchronize_rcu_tasks(void); void rcu_barrier_tasks(void); #ifdef CONFIG_PREEMPT_RCU @@ -176,10 +174,14 @@ extern struct srcu_struct tasks_rcu_exit_srcu; rcu_all_qs(); \ rcu_note_voluntary_context_switch_lite(t); \ } while (0) +void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); +void synchronize_rcu_tasks(void); #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) #define rcu_note_voluntary_context_switch_lite(t) do { } while (0) #define rcu_note_voluntary_context_switch(t) rcu_all_qs() +#define call_rcu_tasks call_rcu_sched +#define synchronize_rcu_tasks synchronize_sched #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** -- cgit From ccdd29ffffa7246cb359b9408772858a15fc4ea5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 May 2017 08:51:48 -0700 Subject: rcu: Create reasonable API for do_exit() TASKS_RCU processing Currently, the exit-time support for TASKS_RCU is open-coded in do_exit(). This commit creates exit_tasks_rcu_start() and exit_tasks_rcu_finish() APIs for do_exit() use. This has the benefit of confining the use of the tasks_rcu_exit_srcu variable to one file, allowing it to become static. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 7 ++++--- include/linux/sched.h | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c3f380befdd7..ce9d21923d75 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -162,8 +162,6 @@ static inline void rcu_init_nohz(void) { } * macro rather than an inline function to avoid #include hell. */ #ifdef CONFIG_TASKS_RCU -#define TASKS_RCU(x) x -extern struct srcu_struct tasks_rcu_exit_srcu; #define rcu_note_voluntary_context_switch_lite(t) \ do { \ if (READ_ONCE((t)->rcu_tasks_holdout)) \ @@ -176,12 +174,15 @@ extern struct srcu_struct tasks_rcu_exit_srcu; } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); +void exit_tasks_rcu_start(void); +void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU */ -#define TASKS_RCU(x) do { } while (0) #define rcu_note_voluntary_context_switch_lite(t) do { } while (0) #define rcu_note_voluntary_context_switch(t) rcu_all_qs() #define call_rcu_tasks call_rcu_sched #define synchronize_rcu_tasks synchronize_sched +static inline void exit_tasks_rcu_start(void) { } +static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..e4c38809a09e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -589,9 +589,10 @@ struct task_struct { #ifdef CONFIG_TASKS_RCU unsigned long rcu_tasks_nvcsw; - bool rcu_tasks_holdout; - struct list_head rcu_tasks_holdout_list; + u8 rcu_tasks_holdout; + u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; + struct list_head rcu_tasks_holdout_list; #endif /* #ifdef CONFIG_TASKS_RCU */ struct sched_info sched_info; -- cgit From 352eee1242ef62e18a475ef9278697dbd865969b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 20 Jun 2017 14:45:46 -0700 Subject: swait: Add idle variants which don't contribute to load average There are cases where folks are using an interruptible swait when using kthreads. This is rather confusing given you'd expect interruptible waits to be -- interruptible, but kthreads are not interruptible ! The reason for such practice though is to avoid having these kthreads contribute to the system load average. When systems are idle some kthreads may spend a lot of time blocking if using swait_event_timeout(). This would contribute to the system load average. On systems without preemption this would mean the load average of an idle system is bumped to 2 instead of 0. On systems with PREEMPT=y this would mean the load average of an idle system is bumped to 3 instead of 0. This adds proper API using TASK_IDLE to make such goals explicit and avoid confusion. Suggested-by: "Eric W. Biederman" Acked-by: "Eric W. Biederman" Tested-by: Paul E. McKenney Signed-off-by: Luis R. Rodriguez Signed-off-by: Paul E. McKenney --- include/linux/swait.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/swait.h b/include/linux/swait.h index c1f9c62a8a50..4a4e180d0a35 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -169,4 +169,59 @@ do { \ __ret; \ }) +#define __swait_event_idle(wq, condition) \ + (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule()) + +/** + * swait_event_idle - wait without system load contribution + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_IDLE) until the @condition evaluates to + * true. The @condition is checked each time the waitqueue @wq is woken up. + * + * This function is mostly used when a kthread or workqueue waits for some + * condition and doesn't want to contribute to system load. Signals are + * ignored. + */ +#define swait_event_idle(wq, condition) \ +do { \ + if (condition) \ + break; \ + __swait_event_idle(wq, condition); \ +} while (0) + +#define __swait_event_idle_timeout(wq, condition, timeout) \ + ___swait_event(wq, ___wait_cond_timeout(condition), \ + TASK_IDLE, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * swait_event_idle_timeout - wait up to timeout without load contribution + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout at which we'll give up in jiffies + * + * The process is put to sleep (TASK_IDLE) until the @condition evaluates to + * true. The @condition is checked each time the waitqueue @wq is woken up. + * + * This function is mostly used when a kthread or workqueue waits for some + * condition and doesn't want to contribute to system load. Signals are + * ignored. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. + */ +#define swait_event_idle_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __swait_event_idle_timeout(wq, \ + condition, timeout); \ + __ret; \ +}) + #endif /* _LINUX_SWAIT_H */ -- cgit From 22e4ebb975822833b083533035233d128b30e98f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 28 Jul 2017 16:40:40 -0400 Subject: membarrier: Provide expedited private command Implement MEMBARRIER_CMD_PRIVATE_EXPEDITED with IPIs using cpumask built from all runqueues for which current thread's mm is the same as the thread calling sys_membarrier. It executes faster than the non-expedited variant (no blocking). It also works on NOHZ_FULL configurations. Scheduler-wise, it requires a memory barrier before and after context switching between processes (which have different mm). The memory barrier before context switch is already present. For the barrier after context switch: * Our TSO archs can do RELEASE without being a full barrier. Look at x86 spin_unlock() being a regular STORE for example. But for those archs, all atomics imply smp_mb and all of them have atomic ops in switch_mm() for mm_cpumask(), and on x86 the CR3 load acts as a full barrier. * From all weakly ordered machines, only ARM64 and PPC can do RELEASE, the rest does indeed do smp_mb(), so there the spin_unlock() is a full barrier and we're good. * ARM64 has a very heavy barrier in switch_to(), which suffices. * PPC just removed its barrier from switch_to(), but appears to be talking about adding something to switch_mm(). So add a smp_mb__after_unlock_lock() for now, until this is settled on the PPC side. Changes since v3: - Properly document the memory barriers provided by each architecture. Changes since v2: - Address comments from Peter Zijlstra, - Add smp_mb__after_unlock_lock() after finish_lock_switch() in finish_task_switch() to add the memory barrier we need after storing to rq->curr. This is much simpler than the previous approach relying on atomic_dec_and_test() in mmdrop(), which actually added a memory barrier in the common case of switching between userspace processes. - Return -EINVAL when MEMBARRIER_CMD_SHARED is used on a nohz_full kernel, rather than having the whole membarrier system call returning -ENOSYS. Indeed, CMD_PRIVATE_EXPEDITED is compatible with nohz_full. Adapt the CMD_QUERY mask accordingly. Changes since v1: - move membarrier code under kernel/sched/ because it uses the scheduler runqueue, - only add the barrier when we switch from a kernel thread. The case where we switch from a user-space thread is already handled by the atomic_dec_and_test() in mmdrop(). - add a comment to mmdrop() documenting the requirement on the implicit memory barrier. CC: Peter Zijlstra CC: Paul E. McKenney CC: Boqun Feng CC: Andrew Hunter CC: Maged Michael CC: gromer@google.com CC: Avi Kivity CC: Benjamin Herrenschmidt CC: Paul Mackerras CC: Michael Ellerman Signed-off-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Tested-by: Dave Watson --- include/uapi/linux/membarrier.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index e0b108bd2624..6d47b3249d8a 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -40,14 +40,33 @@ * (non-running threads are de facto in such a * state). This covers threads from all processes * running on the system. This command returns 0. + * @MEMBARRIER_CMD_PRIVATE_EXPEDITED: + * Execute a memory barrier on each running + * thread belonging to the same process as the current + * thread. Upon return from system call, the + * caller thread is ensured that all its running + * threads siblings have passed through a state + * where all memory accesses to user-space + * addresses match program order between entry + * to and return from the system call + * (non-running threads are de facto in such a + * state). This only covers threads from the + * same processes as the caller thread. This + * command returns 0. The "expedited" commands + * complete faster than the non-expedited ones, + * they never block, but have the downside of + * causing extra overhead. * * Command to be passed to the membarrier system call. The commands need to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * the value 0. */ enum membarrier_cmd { - MEMBARRIER_CMD_QUERY = 0, - MEMBARRIER_CMD_SHARED = (1 << 0), + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), + /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ + /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), }; #endif /* _UAPI_LINUX_MEMBARRIER_H */ -- cgit From 27a0342ac162bf2ba30c288cfb7b72eabed38d8b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 17 Aug 2017 16:42:17 +0200 Subject: soc/tegra: Register SoC device Move this code from arch/arm/mach-tegra and make it common among 32-bit and 64-bit Tegra SoCs. This is slightly complicated by the fact that on 32-bit Tegra, the SoC device is used as the parent for all devices that are instantiated from device tree. Signed-off-by: Thierry Reding --- include/soc/tegra/fuse.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/tegra/fuse.h b/include/soc/tegra/fuse.h index b4c9219e7f95..9b6ea0c72117 100644 --- a/include/soc/tegra/fuse.h +++ b/include/soc/tegra/fuse.h @@ -65,6 +65,8 @@ int tegra_fuse_readl(unsigned long offset, u32 *value); extern struct tegra_sku_info tegra_sku_info; +struct device *tegra_soc_device_register(void); + #endif /* __ASSEMBLY__ */ #endif /* __SOC_TEGRA_FUSE_H__ */ -- cgit From d3a024abbc438277851c510b51ec9b158821488b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 29 Jun 2017 15:47:44 -0700 Subject: locking: Remove spin_unlock_wait() generic definitions There is no agreed-upon definition of spin_unlock_wait()'s semantics, and it appears that all callers could do just as well with a lock/unlock pair. This commit therefore removes spin_unlock_wait() and related definitions from core code. Signed-off-by: Paul E. McKenney Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Will Deacon Cc: Peter Zijlstra Cc: Alan Stern Cc: Andrea Parri Cc: Linus Torvalds --- include/asm-generic/qspinlock.h | 14 -------------- include/linux/spinlock.h | 11 ----------- include/linux/spinlock_up.h | 6 ------ 3 files changed, 31 deletions(-) (limited to 'include') diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 9f0681bf1e87..66260777d644 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -21,17 +21,6 @@ #include -/** - * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock - * @lock : Pointer to queued spinlock structure - * - * There is a very slight possibility of live-lock if the lockers keep coming - * and the waiter is just unfortunate enough to not see any unlock state. - */ -#ifndef queued_spin_unlock_wait -extern void queued_spin_unlock_wait(struct qspinlock *lock); -#endif - /** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure @@ -41,8 +30,6 @@ extern void queued_spin_unlock_wait(struct qspinlock *lock); static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* - * See queued_spin_unlock_wait(). - * * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL * isn't immediately observable. */ @@ -135,6 +122,5 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock) #define arch_spin_trylock(l) queued_spin_trylock(l) #define arch_spin_unlock(l) queued_spin_unlock(l) #define arch_spin_lock_flags(l, f) queued_spin_lock(l) -#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l) #endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 59248dcc6ef3..ef018a6e4985 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -130,12 +130,6 @@ do { \ #define smp_mb__before_spinlock() smp_wmb() #endif -/** - * raw_spin_unlock_wait - wait until the spinlock gets unlocked - * @lock: the spinlock in question. - */ -#define raw_spin_unlock_wait(lock) arch_spin_unlock_wait(&(lock)->raw_lock) - #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock) @@ -369,11 +363,6 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock) raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ }) -static __always_inline void spin_unlock_wait(spinlock_t *lock) -{ - raw_spin_unlock_wait(&lock->rlock); -} - static __always_inline int spin_is_locked(spinlock_t *lock) { return raw_spin_is_locked(&lock->rlock); diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 0d9848de677d..612fb530af41 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -26,11 +26,6 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define arch_spin_is_locked(x) ((x)->slock == 0) -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_cond_load_acquire(&lock->slock, VAL); -} - static inline void arch_spin_lock(arch_spinlock_t *lock) { lock->slock = 0; @@ -73,7 +68,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) -#define arch_spin_unlock_wait(lock) do { barrier(); (void)(lock); } while (0) /* for sched/core.c and kernel_lock.c: */ # define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0) # define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0) -- cgit From c8c03f1858331e85d397bacccd34ef409aae993c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 16 Aug 2017 17:08:07 -0700 Subject: pty: fix the cached path of the pty slave file descriptor in the master Christian Brauner reported that if you use the TIOCGPTPEER ioctl() to get a slave pty file descriptor, the resulting file descriptor doesn't look right in /proc//fd/. In particular, he wanted to use readlink() on /proc/self/fd/ to get the pathname of the slave pty (basically implementing "ptsname{_r}()"). The reason for that was that we had generated the wrong 'struct path' when we create the pty in ptmx_open(). In particular, the dentry was correct, but the vfsmount pointed to the mount of the ptmx node. That _can_ be correct - in case you use "/dev/pts/ptmx" to open the master - but usually is not. The normal case is to use /dev/ptmx, which then looks up the pts/ directory, and then the vfsmount of the ptmx node is obviously the /dev directory, not the /dev/pts/ directory. We actually did have the right vfsmount available, but in the wrong place (it gets looked up in 'devpts_acquire()' when we get a reference to the pts filesystem), and so ptmx_open() used the wrong mnt pointer. The end result of this confusion was that the pty worked fine, but when if you did TIOCGPTPEER to get the slave side of the pty, end end result would also work, but have that dodgy 'struct path'. And then when doing "d_path()" on to get the pathname, the vfsmount would not match the root of the pts directory, and d_path() would return an empty pathname thinking that the entry had escaped a bind mount into another mount. This fixes the problem by making devpts_acquire() return the vfsmount for the pts filesystem, allowing ptmx_open() to trivially just use the right mount for the pts dentry, and create the proper 'struct path'. Reported-by: Christian Brauner Cc: Al Viro Acked-by: Eric Biederman Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 277ab9af9ac2..7883e901f65c 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,7 +19,7 @@ struct pts_fs_info; -struct pts_fs_info *devpts_acquire(struct file *); +struct pts_fs_info *devpts_acquire(struct file *, struct vfsmount **ptsmnt); void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); -- cgit From bc8230ee8e2ba967af780cdaf2dcc0f8e5eb45ca Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Jun 2017 14:39:48 +0200 Subject: quota: Convert dqio_mutex to rwsem Convert dqio_mutex to rwsem and call it dqio_sem. No functional changes yet. Signed-off-by: Jan Kara --- include/linux/quota.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index bfd077ca6ac3..3a6df7461642 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -521,7 +521,7 @@ static inline void quota_send_warning(struct kqid qid, dev_t dev, struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ - struct mutex dqio_mutex; /* lock device while I/O in progress */ + struct rw_semaphore dqio_sem; /* Lock quota file while I/O in progress */ struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ -- cgit From 1312b7e0caca44e7ff312bc2eaa888943384e3e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 10 Aug 2017 00:31:58 +0200 Subject: ACPICA: Make it possible to enable runtime GPEs earlier Runtime GPEs have corresponding _Lxx/_Exx methods and are enabled automatically during the initialization of the ACPI subsystem through acpi_update_all_gpes() with the assumption that acpi_setup_gpe_for_wake() will be called in advance for all of the GPEs pointed to by _PRW objects in the namespace that may be affected by acpi_update_all_gpes(). That is, acpi_ev_initialize_gpe_block() can only be called for a GPE block after acpi_setup_gpe_for_wake() has been called for all of the _PRW (wakeup) GPEs in it. The platform firmware on some systems, however, expects GPEs to be enabled before the enumeration of devices which is when acpi_setup_gpe_for_wake() is called and that goes against the above assumption. For this reason, introduce a new flag to be set by acpi_ev_initialize_gpe_block() when automatically enabling a GPE to indicate to acpi_setup_gpe_for_wake() that it needs to drop the reference to the GPE coming from acpi_ev_initialize_gpe_block() and modify acpi_setup_gpe_for_wake() accordingly. These changes allow acpi_setup_gpe_for_wake() and acpi_ev_initialize_gpe_block() to be invoked in any order. Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- include/acpi/actypes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 2fcbaec8b368..71eddf645566 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -775,7 +775,7 @@ typedef u32 acpi_event_status; * | | | | +-- Type of dispatch:to method, handler, notify, or none * | | | +----- Interrupt type: edge or level triggered * | | +------- Is a Wake GPE - * | +--------- Is GPE masked by the software GPE masking mechanism + * | +--------- Has been enabled automatically at init time * +------------ */ #define ACPI_GPE_DISPATCH_NONE (u8) 0x00 @@ -791,6 +791,7 @@ typedef u32 acpi_event_status; #define ACPI_GPE_XRUPT_TYPE_MASK (u8) 0x08 #define ACPI_GPE_CAN_WAKE (u8) 0x10 +#define ACPI_GPE_AUTO_ENABLED (u8) 0x20 /* * Flags for GPE and Lock interfaces -- cgit From 5d72801538eb59cfd9ca25d00aa439cfbc02ac9a Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 17 Aug 2017 13:32:37 -0400 Subject: lsm_audit: update my email address Update my email address since epoch.ncsc.mil no longer exists. MAINTAINERS and CREDITS are already correct. Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- include/linux/lsm_audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 22b5d4e687ce..d1c2901f1542 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -4,7 +4,7 @@ * * Author : Etienne BASSET * - * All credits to : Stephen Smalley, + * All credits to : Stephen Smalley, * All BUGS to : Etienne BASSET */ #ifndef _LSM_COMMON_LOGGING_ -- cgit From 373533f80b89f0f4fb59b65c2942d1b20b91319c Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 7 Aug 2017 11:13:41 +0200 Subject: drm/ttm: make ttm_mem_type_manager_func debug more useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide the drm printer directly instead of just the callback. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_bo_driver.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d30850e07936..5f821a9b3a1f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -229,13 +229,14 @@ struct ttm_mem_type_manager_func { * struct ttm_mem_type_manager member debug * * @man: Pointer to a memory type manager. - * @prefix: Prefix to be used in printout to identify the caller. + * @printer: Prefix to be used in printout to identify the caller. * * This function is called to print out the state of the memory * type manager to aid debugging of out-of-memory conditions. * It may not be called from within atomic context. */ - void (*debug)(struct ttm_mem_type_manager *man, const char *prefix); + void (*debug)(struct ttm_mem_type_manager *man, + struct drm_printer *printer); }; /** -- cgit From 503330f3820fab13aa2a7b1f9e7633686acc7c79 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 2 Aug 2017 17:18:50 +0200 Subject: quota: Remove dq_wait_unused from dquot Currently every dquot carries a wait_queue_head_t used only when we are turning quotas off to wait for last users to drop dquot references. Since such rare case is not performance sensitive in any means, just use a global waitqueue for this and save space in struct dquot. Also convert the logic to use wait_event() instead of open-coding it. Signed-off-by: Jan Kara --- include/linux/quota.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 3a6df7461642..ad6809f099ac 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -299,7 +299,6 @@ struct dquot { struct list_head dq_dirty; /* List of dirty dquots */ struct mutex dq_lock; /* dquot IO lock */ atomic_t dq_count; /* Use count */ - wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */ struct super_block *dq_sb; /* superblock this applies to */ struct kqid dq_id; /* ID this applies to (uid, gid, projid) */ loff_t dq_off; /* Offset of dquot on disk */ -- cgit From 834057bf846691552a8906f7ed3f67546e5f897c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 Aug 2017 11:18:23 +0200 Subject: quota: Allow disabling tracking of dirty dquots in a list Filesystems that are journalling quotas generally don't need tracking of dirty dquots in a list since forcing a transaction commit flushes all quotas anyway. Allow filesystem to say it doesn't want dquots to be tracked as it reduces contention on the dq_list_lock. Signed-off-by: Jan Kara --- include/linux/quota.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index ad6809f099ac..eccc1cb6274e 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -490,6 +490,9 @@ enum { */ #define DQUOT_NEGATIVE_USAGE (1 << (DQUOT_STATE_LAST + 1)) /* Allow negative quota usage */ +/* Do not track dirty dquots in a list */ +#define DQUOT_NOLIST_DIRTY (1 << (DQUOT_STATE_LAST + 2)) + static inline unsigned int dquot_state_flag(unsigned int flags, int type) { return flags << type; -- cgit From 0ed60de34a975804a256fb3fe233b1466c603be6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Aug 2017 17:07:28 +0200 Subject: quota: Inline functions into their callsites inode_add_rsv_space() and inode_sub_rsv_space() had only one callsite. Inline them there directly. inode_claim_rsv_space() and inode_reclaim_rsv_space() had two callsites so inline them there as well. This will simplify further locking changes. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index dda22f45fc1b..0ce6fc49962e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -38,11 +38,6 @@ void __quota_error(struct super_block *sb, const char *func, /* * declaration of quota_function calls in kernel. */ -void inode_add_rsv_space(struct inode *inode, qsize_t number); -void inode_claim_rsv_space(struct inode *inode, qsize_t number); -void inode_sub_rsv_space(struct inode *inode, qsize_t number); -void inode_reclaim_rsv_space(struct inode *inode, qsize_t number); - int dquot_initialize(struct inode *inode); bool dquot_initialize_needed(struct inode *inode); void dquot_drop(struct inode *inode); -- cgit From f4a8116a4c8c8f754d0ec1498a2ba4b63d114e6a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 8 Aug 2017 09:54:36 +0200 Subject: fs: Provide __inode_get_bytes() Provide helper __inode_get_bytes() which assumes i_lock is already acquired. Quota code will need this to be able to use i_lock to protect consistency of quota accounting information and inode usage. Signed-off-by: Jan Kara --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..d6e9ab7f184f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2998,6 +2998,10 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); +static inline loff_t __inode_get_bytes(struct inode *inode) +{ + return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; +} loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); const char *simple_get_link(struct dentry *, struct inode *, -- cgit From 7b9ca4c61bc278b771fb57d6290a31ab1fc7fdac Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 7 Aug 2017 13:19:50 +0200 Subject: quota: Reduce contention on dq_data_lock dq_data_lock is currently used to protect all modifications of quota accounting information, consistency of quota accounting on the inode, and dquot pointers from inode. As a result contention on the lock can be pretty heavy. Reduce the contention on the lock by protecting quota accounting information by a new dquot->dq_dqb_lock and consistency of quota accounting with inode usage by inode->i_lock. This change reduces time to create 500000 files on ext4 on ramdisk by 50 different processes in separate directories by 6% when user quota is turned on. When those 50 processes belong to 50 different users, the improvement is about 9%. Signed-off-by: Jan Kara --- include/linux/quota.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index eccc1cb6274e..074123975595 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -298,12 +298,13 @@ struct dquot { struct list_head dq_free; /* Free list element */ struct list_head dq_dirty; /* List of dirty dquots */ struct mutex dq_lock; /* dquot IO lock */ + spinlock_t dq_dqb_lock; /* Lock protecting dq_dqb changes */ atomic_t dq_count; /* Use count */ struct super_block *dq_sb; /* superblock this applies to */ struct kqid dq_id; /* ID this applies to (uid, gid, projid) */ loff_t dq_off; /* Offset of dquot on disk */ unsigned long dq_flags; /* See DQ_* */ - struct mem_dqblk dq_dqb; /* Diskquota usage */ + struct mem_dqblk dq_dqb; /* Diskquota usage [dq_dqb_lock] */ }; /* Operations which must be implemented by each quota format */ -- cgit From c71b02e4d207cbcf097f9746d5f7967b22905e70 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Aug 2017 21:11:00 -0700 Subject: Revert "pstore: Honor dmesg_restrict sysctl on dmesg dumps" This reverts commit 68c4a4f8abc60c9440ede9cd123d48b78325f7a3, with various conflict clean-ups. The capability check required too much privilege compared to simple DAC controls. A system builder was forced to have crash handler processes run with CAP_SYSLOG which would give it the ability to read (and wipe) the _current_ dmesg, which is much more access than being given access only to the historical log stored in pstorefs. With the prior commit to make the root directory 0750, the files are protected by default but a system builder can now opt to give access to a specific group (via chgrp on the pstorefs root directory) without being forced to also give away CAP_SYSLOG. Suggested-by: Nick Kralevich Signed-off-by: Kees Cook Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky --- include/linux/syslog.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/syslog.h b/include/linux/syslog.h index c3a7f0cc3a27..e1c3632f4e81 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -49,13 +49,4 @@ int do_syslog(int type, char __user *buf, int count, int source); -#ifdef CONFIG_PRINTK -int check_syslog_permissions(int type, int source); -#else -static inline int check_syslog_permissions(int type, int source) -{ - return 0; -} -#endif - #endif /* _LINUX_SYSLOG_H */ -- cgit From 726fb6b4f2a82a14a906f39bdabac4863b87c01a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 15 Aug 2017 18:16:59 -0700 Subject: ACPI / PM: Check low power idle constraints for debug only For SoC to achieve its lowest power platform idle state a set of hardware preconditions must be met. These preconditions or constraints can be obtained by issuing a device specific method (_DSM) with function "1". Refer to the document provided in the link below. Here during initialization (from attach() callback of LPS0 device), invoke function 1 to get the device constraints. Each enabled constraint is stored in a table. The devices in this table are used to check whether they were in required minimum state, while entering suspend. This check is done from platform freeze wake() callback, only when /sys/power/pm_debug_messages attribute is non zero. If any constraint is not met and device is ACPI power managed then it prints the device information to kernel logs. Also if debug is enabled in acpi/sleep.c, the constraint table and state of each device on wake is dumped in kernel logs. Since pm_debug_messages_on setting is used as condition to check constraints outside kernel/power/main.c, pm_debug_messages_on is changed to a global variable. Link: http://www.uefi.org/sites/default/files/resources/Intel_ACPI_Low_Power_S0_Idle.pdf Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0d41daf7e89d..d10b7980799d 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -495,9 +495,11 @@ static inline void unlock_system_sleep(void) {} #ifdef CONFIG_PM_SLEEP_DEBUG extern bool pm_print_times_enabled; +extern bool pm_debug_messages_on; extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...); #else #define pm_print_times_enabled (false) +#define pm_debug_messages_on (false) #include -- cgit From 74def747bcd09692bdbf8c6a15350795b0f11ca8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Aug 2017 09:39:14 +0100 Subject: genirq: Restrict effective affinity to interrupts actually using it Just because CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK is selected doesn't mean that all the interrupts are using the effective affinity mask. For a number of them, this mask is likely to be empty. In order to deal with this, let's restrict the use of the effective affinity mask to these interrupts that have a non empty effective affinity. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Andrew Lunn Cc: James Hogan Cc: Jason Cooper Cc: Paul Burton Cc: Chris Zankel Cc: Kevin Cernekee Cc: Wei Xu Cc: Max Filippov Cc: Florian Fainelli Cc: Gregory Clement Cc: Matt Redfearn Cc: Sebastian Hesselbarth Link: http://lkml.kernel.org/r/20170818083925.10108-2-marc.zyngier@arm.com --- include/linux/irq.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index d2d543794093..dcfac6c8ba18 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -781,7 +781,10 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) static inline struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { - return d->common->effective_affinity; + if (!cpumask_empty(d->common->effective_affinity)) + return d->common->effective_affinity; + + return d->common->affinity; } static inline void irq_data_update_effective_affinity(struct irq_data *d, const struct cpumask *m) -- cgit From 7703b08cc93b3586f9eb733f3a2b10bed634a5cf Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 17 Aug 2017 17:53:31 -0700 Subject: genirq: Add handle_fasteoi_{level,edge}_irq flow handlers Follow-on patch for gpio-thunderx uses a irqdomain hierarchy which requires slightly different flow handlers, add them to chip.c which contains most of the other flow handlers. Make these conditionally compiled based on CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS. Signed-off-by: David Daney Signed-off-by: Thomas Gleixner Cc: Mark Rutland Cc: Alexandre Courbot Cc: Marc Zyngier Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Link: http://lkml.kernel.org/r/1503017616-3252-3-git-send-email-david.daney@cavium.com --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index d2d543794093..d4728bf6a537 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -568,6 +568,8 @@ extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); extern int irq_chip_pm_get(struct irq_data *data); extern int irq_chip_pm_put(struct irq_data *data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +extern void handle_fasteoi_ack_irq(struct irq_desc *desc); +extern void handle_fasteoi_mask_irq(struct irq_desc *desc); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); -- cgit From 495c38d3001fd226cf91df1d031320f349bcaf35 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 17 Aug 2017 17:53:34 -0700 Subject: irqdomain: Add irq_domain_{push,pop}_irq() functions For an already existing irqdomain hierarchy, as might be obtained via a call to pci_enable_msix_range(), a PCI driver wishing to add an additional irqdomain to the hierarchy needs to be able to insert the irqdomain to that already initialized hierarchy. Calling irq_domain_create_hierarchy() allows the new irqdomain to be created, but no existing code allows for initializing the associated irq_data. Add a couple of helper functions (irq_domain_push_irq() and irq_domain_pop_irq()) to initialize the irq_data for the new irqdomain added to an existing hierarchy. Signed-off-by: David Daney Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: Mark Rutland Cc: Alexandre Courbot Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Link: http://lkml.kernel.org/r/1503017616-3252-6-git-send-email-david.daney@cavium.com --- include/linux/irqdomain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index cac77a5c5555..2318f29054af 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -460,6 +460,9 @@ extern void irq_domain_free_irqs_common(struct irq_domain *domain, extern void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); +extern int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); +extern int irq_domain_pop_irq(struct irq_domain *domain, int virq); + extern int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg); -- cgit From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Aug 2017 09:50:13 +0200 Subject: kernel/watchdog: Prevent false positives with turbo modes The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: dzickus@redhat.com Cc: prarit@redhat.com Cc: ak@linux.intel.com Cc: babu.moger@oracle.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@redhat.com Cc: stable@vger.kernel.org Cc: atomlin@redhat.com Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos --- include/linux/nmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 8aa01fd859fb..a36abe2da13e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif + +#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ + defined(CONFIG_HARDLOCKUP_DETECTOR) +void watchdog_update_hrtimer_threshold(u64 period); +#else +static inline void watchdog_update_hrtimer_threshold(u64 period) { } +#endif + extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , -- cgit From b94417eaa5f5a20d58a99328a401c0b5a812ec7d Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sun, 13 Aug 2017 11:58:30 +0800 Subject: btrfs: use BTRFS_FSID_SIZE for fsid We have define for FSID size so use it. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 1e4908dcd065..dc1d0df91e0b 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -74,11 +74,11 @@ struct prelim_ref; { BTRFS_BLOCK_GROUP_RAID5, "RAID5"}, \ { BTRFS_BLOCK_GROUP_RAID6, "RAID6"} -#define BTRFS_UUID_SIZE 16 -#define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_UUID_SIZE) +#define BTRFS_FSID_SIZE 16 +#define TP_STRUCT__entry_fsid __array(u8, fsid, BTRFS_FSID_SIZE) #define TP_fast_assign_fsid(fs_info) \ - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE) + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE) #define TP_STRUCT__entry_btrfs(args...) \ TP_STRUCT__entry( \ @@ -618,7 +618,7 @@ TRACE_EVENT(btrfs_add_block_group, TP_ARGS(fs_info, block_group, create), TP_STRUCT__entry( - __array( u8, fsid, BTRFS_UUID_SIZE ) + __array( u8, fsid, BTRFS_FSID_SIZE ) __field( u64, offset ) __field( u64, size ) __field( u64, flags ) @@ -628,7 +628,7 @@ TRACE_EVENT(btrfs_add_block_group, ), TP_fast_assign( - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE); __entry->offset = block_group->key.objectid; __entry->size = block_group->key.offset; __entry->flags = block_group->flags; @@ -975,7 +975,7 @@ TRACE_EVENT(btrfs_trigger_flush, TP_ARGS(fs_info, flags, bytes, flush, reason), TP_STRUCT__entry( - __array( u8, fsid, BTRFS_UUID_SIZE ) + __array( u8, fsid, BTRFS_FSID_SIZE ) __field( u64, flags ) __field( u64, bytes ) __field( int, flush ) @@ -983,7 +983,7 @@ TRACE_EVENT(btrfs_trigger_flush, ), TP_fast_assign( - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE); __entry->flags = flags; __entry->bytes = bytes; __entry->flush = flush; @@ -1016,7 +1016,7 @@ TRACE_EVENT(btrfs_flush_space, TP_ARGS(fs_info, flags, num_bytes, state, ret), TP_STRUCT__entry( - __array( u8, fsid, BTRFS_UUID_SIZE ) + __array( u8, fsid, BTRFS_FSID_SIZE ) __field( u64, flags ) __field( u64, num_bytes ) __field( int, state ) @@ -1024,7 +1024,7 @@ TRACE_EVENT(btrfs_flush_space, ), TP_fast_assign( - memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); + memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE); __entry->flags = flags; __entry->num_bytes = num_bytes; __entry->state = state; -- cgit From d352ae205d8b05f3f7558d10f474d8436581b3e2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Aug 2017 16:23:03 -0700 Subject: blk-mq: Make blk_mq_reinit_tagset() calls easier to read Since blk_mq_ops.reinit_request is only called from inside blk_mq_reinit_tagset(), make this function pointer an argument of blk_mq_reinit_tagset() instead of a member of struct blk_mq_ops. This patch does not change any functionality but makes blk_mq_reinit_tagset() calls easier to read and to analyze. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: James Smart Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 14542308d25b..50c6485cb04f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -97,7 +97,6 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); -typedef int (reinit_request_fn)(void *, struct request *); typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); @@ -143,7 +142,6 @@ struct blk_mq_ops { */ init_request_fn *init_request; exit_request_fn *exit_request; - reinit_request_fn *reinit_request; /* Called from inside blk_get_request() */ void (*initialize_rq_fn)(struct request *rq); @@ -261,7 +259,8 @@ void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); +int blk_mq_reinit_tagset(struct blk_mq_tag_set *set, + int (reinit_request)(void *, struct request *)); int blk_mq_map_queues(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); -- cgit From e1cba4b85daa71b710384d451ff6238d5e4d1ff6 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 17 Aug 2017 15:33:09 -0400 Subject: cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup A new mount option "cpuset_v2_mode" is added to the v1 cgroupfs filesystem to enable cpuset controller to use v2 behavior in a v1 cgroup. This mount option applies only to cpuset controller and have no effect on other controllers. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 59e4ad9e7bac..ade4a78a54c2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -74,6 +74,11 @@ enum { * aren't writeable from inside the namespace. */ CGRP_ROOT_NS_DELEGATE = (1 << 3), + + /* + * Enable cpuset controller in v1 cgroup to use v2 behavior. + */ + CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), }; /* cftype->flags */ -- cgit From f01f969e25ecc6f88844bcdd63f1f27610418bbc Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 18 Aug 2017 20:27:54 +0530 Subject: PCI: endpoint: Add an API to get matching "pci_epf_device_id" Add an API to get "pci_epf_device_id" matching the EPF name. This can be used by the EPF driver to get the driver data corresponding to the EPF device name. Signed-off-by: Kishon Vijay Abraham I [bhelgaas: folded in "while" loop termination fix from Colin Ian King ] Signed-off-by: Bjorn Helgaas --- include/linux/pci-epf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 0d529cb90143..6ed63b5e106b 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -149,6 +149,8 @@ static inline void *epf_get_drvdata(struct pci_epf *epf) return dev_get_drvdata(&epf->dev); } +const struct pci_epf_device_id * +pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf); struct pci_epf *pci_epf_create(const char *name); void pci_epf_destroy(struct pci_epf *epf); int __pci_epf_register_driver(struct pci_epf_driver *driver, -- cgit From 92d50fc1602ecef44babe411c475344e55e1cdd9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jul 2017 15:01:06 +0200 Subject: PCI/IB: add support for pci driver attribute groups Some drivers (specifically the nes IB driver), want to create a lot of sysfs driver attributes. Instead of open-coding the creation and removal of these files (and getting it wrong btw), it's a better idea to let the driver core handle all of this logic for us. So add a new field to the pci driver structure, **groups, that allows pci drivers to specify an attribute group list it wishes to have created when it is registered with the driver core. Big bonus is now the driver doesn't race with userspace when the sysfs files are created vs. when the kobject is announced, so any script/tool that actually wanted to use these files will not have to poll waiting for them to show up. Cc: Faisal Latif Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Signed-off-by: Doug Ledford --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..c421af60f2df 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -729,6 +729,7 @@ struct pci_driver { void (*shutdown) (struct pci_dev *dev); int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */ const struct pci_error_handlers *err_handler; + const struct attribute_group **groups; struct device_driver driver; struct pci_dynids dynids; }; -- cgit From 5f8a4db715ca21c3195b5eea24e26574c0f5acfa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 31 Jul 2017 08:50:05 +0200 Subject: infiniband: avoid overflow warning A sockaddr_in structure on the stack getting passed into rdma_ip2gid triggers this warning, since we memcpy into a larger sockaddr_in6 structure: In function 'memcpy', inlined from 'rdma_ip2gid' at include/rdma/ib_addr.h:175:3, inlined from 'addr_event.isra.4.constprop' at drivers/infiniband/core/roce_gid_mgmt.c:693:2, inlined from 'inetaddr_event' at drivers/infiniband/core/roce_gid_mgmt.c:716:9: include/linux/string.h:305:4: error: call to '__read_overflow2' declared with attribute error: detected read beyond size of object passed as 2nd parameter The warning seems appropriate here, but the code is also clearly correct, so we really just want to shut up this instance of the output. The best way I found so far is to avoid the memcpy() call and instead replace it with a struct assignment. Fixes: 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions") Cc: Daniel Micay Cc: Kees Cook Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b73a14edc85e..454e6ea742a5 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -172,7 +172,8 @@ static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) (struct in6_addr *)gid); break; case AF_INET6: - memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, 16); + *(struct in6_addr *)&gid->raw = + ((struct sockaddr_in6 *)addr)->sin6_addr; break; default: return -EINVAL; -- cgit From ce7c252a8c741aba7c38f817b86e34361f561e42 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Aug 2017 15:30:35 -0400 Subject: SUNRPC: Add a separate spinlock to protect the RPC request receive list This further reduces contention with the transport_lock, and allows us to convert to using a non-bh-safe spinlock, since the list is now never accessed from a bh context. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 65b9e0224753..a97e6de5f9f2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -232,6 +232,7 @@ struct rpc_xprt { */ spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ + spinlock_t recv_lock; /* lock receive list */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ -- cgit From 62ede7779904bc75bdd84f1ff0016113956ce3b4 Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Mon, 14 Aug 2017 14:17:43 -0400 Subject: Add OPA extended LID support This patch series primarily increases sizes of variables that hold lid values from 16 to 32 bits. Additionally, it adds a check in the IB mad stack to verify a properly formatted MAD when OPA extended LIDs are used. Signed-off-by: Don Hiatt Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4db4ad56ace6..70a183179224 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3724,16 +3724,30 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, return RDMA_AH_ATTR_TYPE_IB; } -/* Return slid in 16bit CPU encoding */ -static inline u16 ib_slid_cpu16(u32 slid) +/** + * ib_lid_cpu16 - Return lid in 16bit CPU encoding. + * In the current implementation the only way to get + * get the 32bit lid is from other sources for OPA. + * For IB, lids will always be 16bits so cast the + * value accordingly. + * + * @lid: A 32bit LID + */ +static inline u16 ib_lid_cpu16(u32 lid) { - return (u16)slid; + WARN_ON_ONCE(lid & 0xFFFF0000); + return (u16)lid; } -/* Return slid in 16bit BE encoding */ -static inline u16 ib_slid_be16(u32 slid) +/** + * ib_lid_be16 - Return lid in 16bit BE encoding. + * + * @lid: A 32bit LID + */ +static inline __be16 ib_lid_be16(u32 lid) { - return cpu_to_be16((u16)slid); + WARN_ON_ONCE(lid & 0xFFFF0000); + return cpu_to_be16((u16)lid); } /** -- cgit From a0917e0bc6efc05834c0c1eafebd579a9c75e6e9 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 18 Aug 2017 15:04:54 -0400 Subject: datagram: When peeking datagrams with offset < 0 don't skip empty skbs Due to commit e6afc8ace6dd5cef5e812f26c72579da8806f5ac ("udp: remove headers from UDP packets before queueing"), when udp packets are being peeked the requested extra offset is always 0 as there is no need to skip the udp header. However, when the offset is 0 and the next skb is of length 0, it is only returned once. The behaviour can be seen with the following python script: from socket import *; f=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); g=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0); f.bind(('::', 0)); addr=('::1', f.getsockname()[1]); g.sendto(b'', addr) g.sendto(b'b', addr) print(f.recvfrom(10, MSG_PEEK)); print(f.recvfrom(10, MSG_PEEK)); Where the expected output should be the empty string twice. Instead, make sk_peek_offset return negative values, and pass those values to __skb_try_recv_datagram/__skb_try_recv_from_queue. If the passed offset to __skb_try_recv_from_queue is negative, the checked skb is never skipped. __skb_try_recv_from_queue will then ensure the offset is reset back to 0 if a peek is requested without an offset, unless no packets are found. Also simplify the if condition in __skb_try_recv_from_queue. If _off is greater then 0, and off is greater then or equal to skb->len, then (_off || skb->len) must always be true assuming skb->len >= 0 is always true. Also remove a redundant check around a call to sk_peek_offset in af_unix.c, as it double checked if MSG_PEEK was set in the flags. V2: - Moved the negative fixup into __skb_try_recv_from_queue, and remove now redundant checks - Fix peeking in udp{,v6}_recvmsg to report the right value when the offset is 0 V3: - Marked new branch in __skb_try_recv_from_queue as unlikely. Signed-off-by: Matthew Dawson Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 7c0632c7e870..aeeec62992ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -507,9 +507,7 @@ int sk_set_peek_off(struct sock *sk, int val); static inline int sk_peek_offset(struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { - s32 off = READ_ONCE(sk->sk_peek_off); - if (off >= 0) - return off; + return READ_ONCE(sk->sk_peek_off); } return 0; -- cgit From 9620fef27ed2cdb37bf6fd028f32bea2ef5119a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Aug 2017 12:08:07 -0700 Subject: ipv4: convert dst_metrics.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index f73611ec4017..93568bd0a352 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -107,7 +108,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; - atomic_t refcnt; + refcount_t refcnt; }; extern const struct dst_metrics dst_default_metrics; -- cgit From 739f79fc9db1b38f96b5a5109b247a650fbebf6d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 18 Aug 2017 15:15:48 -0700 Subject: mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback() Jaegeuk and Brad report a NULL pointer crash when writeback ending tries to update the memcg stats: BUG: unable to handle kernel NULL pointer dereference at 00000000000003b0 IP: test_clear_page_writeback+0x12e/0x2c0 [...] RIP: 0010:test_clear_page_writeback+0x12e/0x2c0 Call Trace: end_page_writeback+0x47/0x70 f2fs_write_end_io+0x76/0x180 [f2fs] bio_endio+0x9f/0x120 blk_update_request+0xa8/0x2f0 scsi_end_request+0x39/0x1d0 scsi_io_completion+0x211/0x690 scsi_finish_command+0xd9/0x120 scsi_softirq_done+0x127/0x150 __blk_mq_complete_request_remote+0x13/0x20 flush_smp_call_function_queue+0x56/0x110 generic_smp_call_function_single_interrupt+0x13/0x30 smp_call_function_single_interrupt+0x27/0x40 call_function_single_interrupt+0x89/0x90 RIP: 0010:native_safe_halt+0x6/0x10 (gdb) l *(test_clear_page_writeback+0x12e) 0xffffffff811bae3e is in test_clear_page_writeback (./include/linux/memcontrol.h:619). 614 mod_node_page_state(page_pgdat(page), idx, val); 615 if (mem_cgroup_disabled() || !page->mem_cgroup) 616 return; 617 mod_memcg_state(page->mem_cgroup, idx, val); 618 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; 619 this_cpu_add(pn->lruvec_stat->count[idx], val); 620 } 621 622 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 623 gfp_t gfp_mask, The issue is that writeback doesn't hold a page reference and the page might get freed after PG_writeback is cleared (and the mapping is unlocked) in test_clear_page_writeback(). The stat functions looking up the page's node or zone are safe, as those attributes are static across allocation and free cycles. But page->mem_cgroup is not, and it will get cleared if we race with truncation or migration. It appears this race window has been around for a while, but less likely to trigger when the memcg stats were updated first thing after PG_writeback is cleared. Recent changes reshuffled this code to update the global node stats before the memcg ones, though, stretching the race window out to an extent where people can reproduce the problem. Update test_clear_page_writeback() to look up and pin page->mem_cgroup before clearing PG_writeback, then not use that pointer afterward. It is a partial revert of 62cccb8c8e7a ("mm: simplify lock_page_memcg()") but leaves the pageref-holding callsites that aren't affected alone. Link: http://lkml.kernel.org/r/20170809183825.GA26387@cmpxchg.org Fixes: 62cccb8c8e7a ("mm: simplify lock_page_memcg()") Signed-off-by: Johannes Weiner Reported-by: Jaegeuk Kim Tested-by: Jaegeuk Kim Reported-by: Bradley Bolen Tested-by: Brad Bolen Cc: Vladimir Davydov Cc: Michal Hocko Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3914e3dd6168..9b15a4bcfa77 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -484,7 +484,8 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -void lock_page_memcg(struct page *page); +struct mem_cgroup *lock_page_memcg(struct page *page); +void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, @@ -809,7 +810,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void lock_page_memcg(struct page *page) +static inline struct mem_cgroup *lock_page_memcg(struct page *page) +{ + return NULL; +} + +static inline void __unlock_page_memcg(struct mem_cgroup *memcg) { } -- cgit From 8ada92799ec4de00f4bc0f10b1ededa256c1ab22 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:15:55 -0700 Subject: wait: add wait_event_killable_timeout() These are the few pending fixes I have queued up for v4.13-final. One is a a generic regression fix for recursive loops on kmod and the other one is a trivial print out correction. During the v4.13 development we assumed that recursive kmod loops were no longer possible. Clearly that is not true. The regression fix makes use of a new killable wait. We use a killable wait to be paranoid in how signals might be sent to modprobe and only accept a proper SIGKILL. The signal will only be available to userspace to issue *iff* a thread has already entered a wait state, and that happens only if we've already throttled after 50 kmod threads have been hit. Note that although it may seem excessive to trigger a failure afer 5 seconds if all kmod thread remain busy, prior to the series of changes that went into v4.13 we would actually *always* fatally fail any request which came in if the limit was already reached. The new waiting implemented in v4.13 actually gives us *more* breathing room -- the wait for 5 seconds is a wait for *any* kmod thread to finish. We give up and fail *iff* no kmod thread has finished and they're *all* running straight for 5 consecutive seconds. If 50 kmod threads are running consecutively for 5 seconds something else must be really bad. Recursive loops with kmod are bad but they're also hard to implement properly as a selftest without currently fooling current userspace tools like kmod [1]. For instance kmod will complain when you run depmod if it finds a recursive loop with symbol dependency between modules as such this type of recursive loop cannot go upstream as the modules_install target will fail after running depmod. These tests already exist on userspace kmod upstream though (refer to the testsuite/module-playground/mod-loop-*.c files). The same is not true if request_module() is used though, or worst if aliases are used. Likewise the issue with 64-bit kernels booting 32-bit userspace without a binfmt handler built-in is also currently not detected and proactively avoided by userspace kmod tools, or kconfig for all architectures. Although we could complain in the kernel when some of these individual recursive issues creep up, proactively avoiding these situations in userspace at build time is what we should keep striving for. Lastly, since recursive loops could happen with kmod it may mean recursive loops may also be possible with other kernel usermode helpers, this should be investigated and long term if we can come up with a more sensible generic solution even better! [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20170809-kmod-for-v4.13-final [1] https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git This patch (of 3): This wait is similar to wait_event_interruptible_timeout() but only accepts SIGKILL interrupt signal. Other signals are ignored. Link: http://lkml.kernel.org/r/20170809234635.13443-2-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Kees Cook Cc: Dmitry Torokhov Cc: Jessica Yu Cc: Rusty Russell Cc: Michal Marek Cc: Petr Mladek Cc: Miroslav Benes Cc: Josh Poimboeuf Cc: "Eric W. Biederman" Cc: Shuah Khan Cc: Matt Redfearn Cc: Dan Carpenter Cc: Colin Ian King Cc: Daniel Mentz Cc: David Binderman Cc: Matt Redfearn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5b74e36c0ca8..dc19880c02f5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -757,6 +757,43 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_killable_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_KILLABLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_KILLABLE) until the + * @condition evaluates to true or a kill signal is received. + * The @condition is checked each time the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was + * interrupted by a kill signal. + * + * Only kill signals interrupt this process. + */ +#define wait_event_killable_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_killable_timeout(wq_head, \ + condition, timeout); \ + __ret; \ +}) + #define __wait_event_lock_irq(wq_head, condition, lock, cmd) \ (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ -- cgit From 3010f876500f9ba921afaeccec30c45ca6584dc8 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 Aug 2017 15:16:05 -0700 Subject: mm: discard memblock data later There is existing use after free bug when deferred struct pages are enabled: The memblock_add() allocates memory for the memory array if more than 128 entries are needed. See comment in e820__memblock_setup(): * The bootstrap memblock region count maximum is 128 entries * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries * than that - so allow memblock resizing. This memblock memory is freed here: free_low_memory_core_early() We access the freed memblock.memory later in boot when deferred pages are initialized in this path: deferred_init_memmap() for_each_mem_pfn_range() __next_mem_pfn_range() type = &memblock.memory; One possible explanation for why this use-after-free hasn't been hit before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded at least on systems where deferred struct pages were enabled. Tested by reducing INIT_MEMBLOCK_REGIONS down to 4 from the current 128, and verifying in qemu that this code is getting excuted and that the freed pages are sane. Link: http://lkml.kernel.org/r/1502485554-318703-2-git-send-email-pasha.tatashin@oracle.com Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Acked-by: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 77d427974f57..bae11c7e7bf3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ extern int memblock_debug; #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit #define __initdata_memblock __meminitdata +void memblock_discard(void); #else #define __init_memblock #define __initdata_memblock @@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); -phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); -phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); @@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, phys_addr_t *out_end); +void __memblock_free_early(phys_addr_t base, phys_addr_t size); +void __memblock_free_late(phys_addr_t base, phys_addr_t size); + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. -- cgit From 6b31d5955cb29a51c5baffee382f213d75e98fb8 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 18 Aug 2017 15:16:15 -0700 Subject: mm, oom: fix potential data corruption when oom_reaper races with writer Wenwei Tao has noticed that our current assumption that the oom victim is dying and never doing any visible changes after it dies, and so the oom_reaper can tear it down, is not entirely true. __task_will_free_mem consider a task dying when SIGNAL_GROUP_EXIT is set but do_group_exit sends SIGKILL to all threads _after_ the flag is set. So there is a race window when some threads won't have fatal_signal_pending while the oom_reaper could start unmapping the address space. Moreover some paths might not check for fatal signals before each PF/g-u-p/copy_from_user. We already have a protection for oom_reaper vs. PF races by checking MMF_UNSTABLE. This has been, however, checked only for kernel threads (use_mm users) which can outlive the oom victim. A simple fix would be to extend the current check in handle_mm_fault for all tasks but that wouldn't be sufficient because the current check assumes that a kernel thread would bail out after EFAULT from get_user*/copy_from_user and never re-read the same address which would succeed because the PF path has established page tables already. This seems to be the case for the only existing use_mm user currently (virtio driver) but it is rather fragile in general. This is even more fragile in general for more complex paths such as generic_perform_write which can re-read the same address more times (e.g. iov_iter_copy_from_user_atomic to fail and then iov_iter_fault_in_readable on retry). Therefore we have to implement MMF_UNSTABLE protection in a robust way and never make a potentially corrupted content visible. That requires to hook deeper into the PF path and check for the flag _every time_ before a pte for anonymous memory is established (that means all !VM_SHARED mappings). The corruption can be triggered artificially (http://lkml.kernel.org/r/201708040646.v746kkhC024636@www262.sakura.ne.jp) but there doesn't seem to be any real life bug report. The race window should be quite tight to trigger most of the time. Link: http://lkml.kernel.org/r/20170807113839.16695-3-mhocko@kernel.org Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Michal Hocko Reported-by: Wenwei Tao Tested-by: Tetsuo Handa Cc: "Kirill A. Shutemov" Cc: Andrea Argangeli Cc: David Rientjes Cc: Oleg Nesterov Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/oom.h b/include/linux/oom.h index 8a266e2be5a6..76aac4ce39bc 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -6,6 +6,8 @@ #include #include #include +#include /* MMF_* */ +#include /* VM_FAULT* */ struct zonelist; struct notifier_block; @@ -63,6 +65,26 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) return tsk->signal->oom_mm; } +/* + * Checks whether a page fault on the given mm is still reliable. + * This is no longer true if the oom reaper started to reap the + * address space which is reflected by MMF_UNSTABLE flag set in + * the mm. At that moment any !shared mapping would lose the content + * and could cause a memory corruption (zero pages instead of the + * original content). + * + * User should call this before establishing a page table entry for + * a !shared mapping and under the proper page table lock. + * + * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise. + */ +static inline int check_stable_address_space(struct mm_struct *mm) +{ + if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) + return VM_FAULT_SIGBUS; + return 0; +} + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); -- cgit From 0888e372c37fa31882c8ed89fb2f8188b08b6718 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Thu, 17 Aug 2017 00:35:11 +0000 Subject: net: inet: diag: expose sockets cgroup classid This is useful for directly looking up a task based on class id rather than having to scan through all open file descriptors. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index bbe201047df6..678496897a68 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -142,6 +142,7 @@ enum { INET_DIAG_PAD, INET_DIAG_MARK, INET_DIAG_BBRINFO, + INET_DIAG_CLASS_ID, __INET_DIAG_MAX, }; -- cgit From 4c03bdd7b5c084c3c6973cb2419edac5363c051f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 17 Aug 2017 18:22:37 +0200 Subject: xdp: adjust xdp redirect tracepoint to include return error code The return error code need to be included in the tracepoint xdp:xdp_redirect, else its not possible to distinguish successful or failed XDP_REDIRECT transmits. XDP have no queuing mechanism. Thus, it is fairly easily to overrun a NIC transmit queue. The eBPF program invoking helpers (bpf_redirect or bpf_redirect_map) to redirect a packet doesn't get any feedback whether the packet was actually transmitted. Info on failed transmits in the tracepoint xdp:xdp_redirect, is interesting as this opens for providing a feedback-loop to the receiving XDP program. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 7b1eb7b4be41..0e42e69f773b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -53,15 +53,16 @@ TRACE_EVENT(xdp_redirect, TP_PROTO(const struct net_device *from, const struct net_device *to, - const struct bpf_prog *xdp, u32 act), + const struct bpf_prog *xdp, u32 act, int err), - TP_ARGS(from, to, xdp, act), + TP_ARGS(from, to, xdp, act, err), TP_STRUCT__entry( __string(name_from, from->name) __string(name_to, to->name) __array(u8, prog_tag, 8) __field(u32, act) + __field(int, err) ), TP_fast_assign( @@ -70,12 +71,14 @@ TRACE_EVENT(xdp_redirect, __assign_str(name_from, from->name); __assign_str(name_to, to->name); __entry->act = act; + __entry->err = err; ), - TP_printk("prog=%s from=%s to=%s action=%s", + TP_printk("prog=%s from=%s to=%s action=%s err=%d", __print_hex_str(__entry->prog_tag, 8), __get_str(name_from), __get_str(name_to), - __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->err) ); #endif /* _TRACE_XDP_H */ -- cgit From 606c07f3086017a8c2d7ce0843807e81b541edcc Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Fri, 18 Aug 2017 09:03:44 -0500 Subject: net: ethtool: Add macro to clear a link mode setting There are currently macros to set and test an ETHTOOL_LINK_MODE_ setting, but not to clear one. Add a macro to clear an ETHTOOL_LINK_MODE_ setting. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- include/linux/ethtool.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index afdbb701fdb4..4587a4c36923 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -136,6 +136,17 @@ struct ethtool_link_ksettings { #define ethtool_link_ksettings_add_link_mode(ptr, name, mode) \ __set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) +/** + * ethtool_link_ksettings_del_link_mode - clear bit in link_ksettings + * link mode mask + * @ptr : pointer to struct ethtool_link_ksettings + * @name : one of supported/advertising/lp_advertising + * @mode : one of the ETHTOOL_LINK_MODE_*_BIT + * (not atomic, no bound checking) + */ +#define ethtool_link_ksettings_del_link_mode(ptr, name, mode) \ + __clear_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) + /** * ethtool_link_ksettings_test_link_mode - test bit in ksettings link mode mask * @ptr : pointer to struct ethtool_link_ksettings -- cgit From b793dc5c6edfb106fd57d12ad6aca64bf160b403 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 18 Aug 2017 13:46:20 -0700 Subject: net: constify netdev_class_file These functions are wrapper arount class_create_file which can take a const attribute. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0f1c4cb2441e..eaa77bd9cb80 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4013,17 +4013,17 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi return rc; } -int netdev_class_create_file_ns(struct class_attribute *class_attr, +int netdev_class_create_file_ns(const struct class_attribute *class_attr, const void *ns); -void netdev_class_remove_file_ns(struct class_attribute *class_attr, +void netdev_class_remove_file_ns(const struct class_attribute *class_attr, const void *ns); -static inline int netdev_class_create_file(struct class_attribute *class_attr) +static inline int netdev_class_create_file(const struct class_attribute *class_attr) { return netdev_class_create_file_ns(class_attr, NULL); } -static inline void netdev_class_remove_file(struct class_attribute *class_attr) +static inline void netdev_class_remove_file(const struct class_attribute *class_attr) { netdev_class_remove_file_ns(class_attr, NULL); } -- cgit From 737aec57c672c1308d602afecd841455c39561e5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 18 Aug 2017 13:46:22 -0700 Subject: net: constify net_ns_type_operations This can be const. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eaa77bd9cb80..b0c928598dab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4028,7 +4028,7 @@ static inline void netdev_class_remove_file(const struct class_attribute *class_ netdev_class_remove_file_ns(class_attr, NULL); } -extern struct kobj_ns_type_operations net_ns_type_operations; +extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); -- cgit From 718ad681eff47d3d04879ff5f5290bdab0b8bad6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 18 Aug 2017 13:46:24 -0700 Subject: net: drop unused attribute argument from sysfs queue funcs The show and store functions don't need/use the attribute. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0c928598dab..c5475b37a631 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -694,10 +694,9 @@ struct netdev_rx_queue { */ struct rx_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, char *buf); + ssize_t (*show)(struct netdev_rx_queue *queue, char *buf); ssize_t (*store)(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, const char *buf, size_t len); + const char *buf, size_t len); }; #ifdef CONFIG_XPS -- cgit From cd030a78f7aa06fe216f6665a6ea84b8f3e5b3d3 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 15 Aug 2017 13:55:29 +0800 Subject: clk: sunxi-ng: support R40 SoC Allwinner R40 SoC have a clock controller module in the style of the SoCs beyond sun6i, however, it's more rich and complex. Add support for it. Signed-off-by: Icenowy Zheng Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/clock/sun8i-r40-ccu.h | 187 ++++++++++++++++++++++++++++++ include/dt-bindings/reset/sun8i-r40-ccu.h | 130 +++++++++++++++++++++ 2 files changed, 317 insertions(+) create mode 100644 include/dt-bindings/clock/sun8i-r40-ccu.h create mode 100644 include/dt-bindings/reset/sun8i-r40-ccu.h (limited to 'include') diff --git a/include/dt-bindings/clock/sun8i-r40-ccu.h b/include/dt-bindings/clock/sun8i-r40-ccu.h new file mode 100644 index 000000000000..4fa5f69fc297 --- /dev/null +++ b/include/dt-bindings/clock/sun8i-r40-ccu.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2017 Icenowy Zheng + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_CLK_SUN8I_R40_H_ +#define _DT_BINDINGS_CLK_SUN8I_R40_H_ + +#define CLK_CPU 24 + +#define CLK_BUS_MIPI_DSI 29 +#define CLK_BUS_CE 30 +#define CLK_BUS_DMA 31 +#define CLK_BUS_MMC0 32 +#define CLK_BUS_MMC1 33 +#define CLK_BUS_MMC2 34 +#define CLK_BUS_MMC3 35 +#define CLK_BUS_NAND 36 +#define CLK_BUS_DRAM 37 +#define CLK_BUS_EMAC 38 +#define CLK_BUS_TS 39 +#define CLK_BUS_HSTIMER 40 +#define CLK_BUS_SPI0 41 +#define CLK_BUS_SPI1 42 +#define CLK_BUS_SPI2 43 +#define CLK_BUS_SPI3 44 +#define CLK_BUS_SATA 45 +#define CLK_BUS_OTG 46 +#define CLK_BUS_EHCI0 47 +#define CLK_BUS_EHCI1 48 +#define CLK_BUS_EHCI2 49 +#define CLK_BUS_OHCI0 50 +#define CLK_BUS_OHCI1 51 +#define CLK_BUS_OHCI2 52 +#define CLK_BUS_VE 53 +#define CLK_BUS_MP 54 +#define CLK_BUS_DEINTERLACE 55 +#define CLK_BUS_CSI0 56 +#define CLK_BUS_CSI1 57 +#define CLK_BUS_HDMI1 58 +#define CLK_BUS_HDMI0 59 +#define CLK_BUS_DE 60 +#define CLK_BUS_TVE0 61 +#define CLK_BUS_TVE1 62 +#define CLK_BUS_TVE_TOP 63 +#define CLK_BUS_GMAC 64 +#define CLK_BUS_GPU 65 +#define CLK_BUS_TVD0 66 +#define CLK_BUS_TVD1 67 +#define CLK_BUS_TVD2 68 +#define CLK_BUS_TVD3 69 +#define CLK_BUS_TVD_TOP 70 +#define CLK_BUS_TCON_LCD0 71 +#define CLK_BUS_TCON_LCD1 72 +#define CLK_BUS_TCON_TV0 73 +#define CLK_BUS_TCON_TV1 74 +#define CLK_BUS_TCON_TOP 75 +#define CLK_BUS_CODEC 76 +#define CLK_BUS_SPDIF 77 +#define CLK_BUS_AC97 78 +#define CLK_BUS_PIO 79 +#define CLK_BUS_IR0 80 +#define CLK_BUS_IR1 81 +#define CLK_BUS_THS 82 +#define CLK_BUS_KEYPAD 83 +#define CLK_BUS_I2S0 84 +#define CLK_BUS_I2S1 85 +#define CLK_BUS_I2S2 86 +#define CLK_BUS_I2C0 87 +#define CLK_BUS_I2C1 88 +#define CLK_BUS_I2C2 89 +#define CLK_BUS_I2C3 90 +#define CLK_BUS_CAN 91 +#define CLK_BUS_SCR 92 +#define CLK_BUS_PS20 93 +#define CLK_BUS_PS21 94 +#define CLK_BUS_I2C4 95 +#define CLK_BUS_UART0 96 +#define CLK_BUS_UART1 97 +#define CLK_BUS_UART2 98 +#define CLK_BUS_UART3 99 +#define CLK_BUS_UART4 100 +#define CLK_BUS_UART5 101 +#define CLK_BUS_UART6 102 +#define CLK_BUS_UART7 103 +#define CLK_BUS_DBG 104 + +#define CLK_THS 105 +#define CLK_NAND 106 +#define CLK_MMC0 107 +#define CLK_MMC1 108 +#define CLK_MMC2 109 +#define CLK_MMC3 110 +#define CLK_TS 111 +#define CLK_CE 112 +#define CLK_SPI0 113 +#define CLK_SPI1 114 +#define CLK_SPI2 115 +#define CLK_SPI3 116 +#define CLK_I2S0 117 +#define CLK_I2S1 118 +#define CLK_I2S2 119 +#define CLK_AC97 120 +#define CLK_SPDIF 121 +#define CLK_KEYPAD 122 +#define CLK_SATA 123 +#define CLK_USB_PHY0 124 +#define CLK_USB_PHY1 125 +#define CLK_USB_PHY2 126 +#define CLK_USB_OHCI0 127 +#define CLK_USB_OHCI1 128 +#define CLK_USB_OHCI2 129 +#define CLK_IR0 130 +#define CLK_IR1 131 + +#define CLK_DRAM_VE 133 +#define CLK_DRAM_CSI0 134 +#define CLK_DRAM_CSI1 135 +#define CLK_DRAM_TS 136 +#define CLK_DRAM_TVD 137 +#define CLK_DRAM_MP 138 +#define CLK_DRAM_DEINTERLACE 139 +#define CLK_DE 140 +#define CLK_MP 141 +#define CLK_TCON_LCD0 142 +#define CLK_TCON_LCD1 143 +#define CLK_TCON_TV0 144 +#define CLK_TCON_TV1 145 +#define CLK_DEINTERLACE 146 +#define CLK_CSI1_MCLK 147 +#define CLK_CSI_SCLK 148 +#define CLK_CSI0_MCLK 149 +#define CLK_VE 150 +#define CLK_CODEC 151 +#define CLK_AVS 152 +#define CLK_HDMI 153 +#define CLK_HDMI_SLOW 154 + +#define CLK_DSI_DPHY 156 +#define CLK_TVE0 157 +#define CLK_TVE1 158 +#define CLK_TVD0 159 +#define CLK_TVD1 160 +#define CLK_TVD2 161 +#define CLK_TVD3 162 +#define CLK_GPU 163 +#define CLK_OUTA 164 +#define CLK_OUTB 165 + +#endif /* _DT_BINDINGS_CLK_SUN8I_R40_H_ */ diff --git a/include/dt-bindings/reset/sun8i-r40-ccu.h b/include/dt-bindings/reset/sun8i-r40-ccu.h new file mode 100644 index 000000000000..c5ebcf6672e4 --- /dev/null +++ b/include/dt-bindings/reset/sun8i-r40-ccu.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2017 Icenowy Zheng + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_RST_SUN8I_R40_H_ +#define _DT_BINDINGS_RST_SUN8I_R40_H_ + +#define RST_USB_PHY0 0 +#define RST_USB_PHY1 1 +#define RST_USB_PHY2 2 + +#define RST_DRAM 3 +#define RST_MBUS 4 + +#define RST_BUS_MIPI_DSI 5 +#define RST_BUS_CE 6 +#define RST_BUS_DMA 7 +#define RST_BUS_MMC0 8 +#define RST_BUS_MMC1 9 +#define RST_BUS_MMC2 10 +#define RST_BUS_MMC3 11 +#define RST_BUS_NAND 12 +#define RST_BUS_DRAM 13 +#define RST_BUS_EMAC 14 +#define RST_BUS_TS 15 +#define RST_BUS_HSTIMER 16 +#define RST_BUS_SPI0 17 +#define RST_BUS_SPI1 18 +#define RST_BUS_SPI2 19 +#define RST_BUS_SPI3 20 +#define RST_BUS_SATA 21 +#define RST_BUS_OTG 22 +#define RST_BUS_EHCI0 23 +#define RST_BUS_EHCI1 24 +#define RST_BUS_EHCI2 25 +#define RST_BUS_OHCI0 26 +#define RST_BUS_OHCI1 27 +#define RST_BUS_OHCI2 28 +#define RST_BUS_VE 29 +#define RST_BUS_MP 30 +#define RST_BUS_DEINTERLACE 31 +#define RST_BUS_CSI0 32 +#define RST_BUS_CSI1 33 +#define RST_BUS_HDMI0 34 +#define RST_BUS_HDMI1 35 +#define RST_BUS_DE 36 +#define RST_BUS_TVE0 37 +#define RST_BUS_TVE1 38 +#define RST_BUS_TVE_TOP 39 +#define RST_BUS_GMAC 40 +#define RST_BUS_GPU 41 +#define RST_BUS_TVD0 42 +#define RST_BUS_TVD1 43 +#define RST_BUS_TVD2 44 +#define RST_BUS_TVD3 45 +#define RST_BUS_TVD_TOP 46 +#define RST_BUS_TCON_LCD0 47 +#define RST_BUS_TCON_LCD1 48 +#define RST_BUS_TCON_TV0 49 +#define RST_BUS_TCON_TV1 50 +#define RST_BUS_TCON_TOP 51 +#define RST_BUS_DBG 52 +#define RST_BUS_LVDS 53 +#define RST_BUS_CODEC 54 +#define RST_BUS_SPDIF 55 +#define RST_BUS_AC97 56 +#define RST_BUS_IR0 57 +#define RST_BUS_IR1 58 +#define RST_BUS_THS 59 +#define RST_BUS_KEYPAD 60 +#define RST_BUS_I2S0 61 +#define RST_BUS_I2S1 62 +#define RST_BUS_I2S2 63 +#define RST_BUS_I2C0 64 +#define RST_BUS_I2C1 65 +#define RST_BUS_I2C2 66 +#define RST_BUS_I2C3 67 +#define RST_BUS_CAN 68 +#define RST_BUS_SCR 69 +#define RST_BUS_PS20 70 +#define RST_BUS_PS21 71 +#define RST_BUS_I2C4 72 +#define RST_BUS_UART0 73 +#define RST_BUS_UART1 74 +#define RST_BUS_UART2 75 +#define RST_BUS_UART3 76 +#define RST_BUS_UART4 77 +#define RST_BUS_UART5 78 +#define RST_BUS_UART6 79 +#define RST_BUS_UART7 80 + +#endif /* _DT_BINDINGS_RST_SUN8I_R40_H_ */ -- cgit From 99d1712bc41c7c9a5a473c104a4ad15427757b22 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:15:29 +0200 Subject: netfilter: exthdr: tcp option set support This allows setting 2 and 4 byte quantities in the tcp option space. Main purpose is to allow native replacement for xt_TCPMSS to work around pmtu blackholes. Writes to kind and len are now allowed at the moment, it does not seem useful to do this as it causes corruption of the tcp option space. We can always lift this restriction later if a use-case appears. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index be25cf69295b..40fd199f7531 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -732,7 +732,8 @@ enum nft_exthdr_op { * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) - * @NFTA_EXTHDR_OP: option match type (NLA_U8) + * @NFTA_EXTHDR_OP: option match type (NLA_U32) + * @NFTA_EXTHDR_SREG: option match type (NLA_U32) */ enum nft_exthdr_attributes { NFTA_EXTHDR_UNSPEC, @@ -742,6 +743,7 @@ enum nft_exthdr_attributes { NFTA_EXTHDR_LEN, NFTA_EXTHDR_FLAGS, NFTA_EXTHDR_OP, + NFTA_EXTHDR_SREG, __NFTA_EXTHDR_MAX }; #define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) -- cgit From 6b5dc98e8fac041a3decfc3186e08c1c570ea691 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:48:04 +0200 Subject: netfilter: rt: add support to fetch path mss to be used in combination with tcp option set support to mimic iptables TCPMSS --clamp-mss-to-pmtu. v2: Eric Dumazet points out dst must be initialized. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 40fd199f7531..b49da72efa68 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -811,11 +811,13 @@ enum nft_meta_keys { * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + * @NFT_RT_TCPMSS: fetch current path tcp mss */ enum nft_rt_keys { NFT_RT_CLASSID, NFT_RT_NEXTHOP4, NFT_RT_NEXTHOP6, + NFT_RT_TCPMSS, }; /** -- cgit From 96eabe7a40aa17e613cf3db2c742ee8b1fc764d0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 18 Aug 2017 11:28:00 -0700 Subject: bpf: Allow selecting numa node during map creation The current map creation API does not allow to provide the numa-node preference. The memory usually comes from where the map-creation-process is running. The performance is not ideal if the bpf_prog is known to always run in a numa node different from the map-creation-process. One of the use case is sharding on CPU to different LRU maps (i.e. an array of LRU maps). Here is the test result of map_perf_test on the INNER_LRU_HASH_PREALLOC test if we force the lru map used by CPU0 to be allocated from a remote numa node: [ The machine has 20 cores. CPU0-9 at node 0. CPU10-19 at node 1 ] ># taskset -c 10 ./map_perf_test 512 8 1260000 8000000 5:inner_lru_hash_map_perf pre-alloc 1628380 events per sec 4:inner_lru_hash_map_perf pre-alloc 1626396 events per sec 3:inner_lru_hash_map_perf pre-alloc 1626144 events per sec 6:inner_lru_hash_map_perf pre-alloc 1621657 events per sec 2:inner_lru_hash_map_perf pre-alloc 1621534 events per sec 1:inner_lru_hash_map_perf pre-alloc 1620292 events per sec 7:inner_lru_hash_map_perf pre-alloc 1613305 events per sec 0:inner_lru_hash_map_perf pre-alloc 1239150 events per sec #<<< After specifying numa node: ># taskset -c 10 ./map_perf_test 512 8 1260000 8000000 5:inner_lru_hash_map_perf pre-alloc 1629627 events per sec 3:inner_lru_hash_map_perf pre-alloc 1628057 events per sec 1:inner_lru_hash_map_perf pre-alloc 1623054 events per sec 6:inner_lru_hash_map_perf pre-alloc 1616033 events per sec 2:inner_lru_hash_map_perf pre-alloc 1614630 events per sec 4:inner_lru_hash_map_perf pre-alloc 1612651 events per sec 7:inner_lru_hash_map_perf pre-alloc 1609337 events per sec 0:inner_lru_hash_map_perf pre-alloc 1619340 events per sec #<<< This patch adds one field, numa_node, to the bpf_attr. Since numa node 0 is a valid node, a new flag BPF_F_NUMA_NODE is also added. The numa_node field is honored if and only if the BPF_F_NUMA_NODE flag is set. Numa node selection is not supported for percpu map. This patch does not change all the kmalloc. F.e. 'htab = kzalloc()' is not changed since the object is small enough to stay in the cache. Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 +++++++++- include/uapi/linux/bpf.h | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1cc6c5ff61ec..55b88e329804 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -51,6 +51,7 @@ struct bpf_map { u32 map_flags; u32 pages; u32 id; + int numa_node; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; @@ -264,7 +265,7 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); -void *bpf_map_area_alloc(size_t size); +void *bpf_map_area_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; @@ -316,6 +317,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); +/* Return map's numa specified by userspace */ +static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) +{ + return (attr->map_flags & BPF_F_NUMA_NODE) ? + attr->numa_node : NUMA_NO_NODE; +} + #else static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5ecbe812a2cc..843818dff96d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -165,6 +165,7 @@ enum bpf_attach_type { #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +/* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list @@ -173,6 +174,8 @@ enum bpf_attach_type { * across different LRU lists. */ #define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -180,8 +183,13 @@ union bpf_attr { __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ - __u32 map_flags; /* prealloc or not */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit From d6e1e46f69fbe956e877cdd00dbfb002baddf577 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 19 Aug 2017 23:34:03 -0700 Subject: bpf: linux/bpf.h needs linux/numa.h Reported-by: kbuild test robot Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 55b88e329804..830f472d8df5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -14,6 +14,7 @@ #include #include #include +#include struct perf_event; struct bpf_prog; -- cgit From 5405fa26c25e18ab735daddc46c8a7a78f138f70 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 15 Jun 2017 18:29:23 +0300 Subject: net/mlx5: Add PCIe outbound stalls counters infrastructure Add capability bit in MCAM register and counters to MPCNT register. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c99daffc3c3c..ba533b39c885 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1854,7 +1854,17 @@ struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { u8 crc_error_tlp[0x20]; - u8 reserved_at_140[0x680]; + u8 reserved_at_140[0x40]; + + u8 outbound_stalled_reads[0x20]; + + u8 outbound_stalled_writes[0x20]; + + u8 outbound_stalled_reads_events[0x20]; + + u8 outbound_stalled_writes_events[0x20]; + + u8 reserved_at_200[0x5c0]; }; struct mlx5_ifc_cmd_inter_comp_event_bits { @@ -7744,8 +7754,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x7d]; - + u8 reserved_at_0[0x7b]; + u8 pcie_outbound_stalled[0x1]; + u8 reserved_at_7c[0x1]; u8 mtpps_enh_out_per_adj[0x1]; u8 mtpps_fs[0x1]; u8 pcie_performance_group[0x1]; -- cgit From 2dba07975acbc30c77a22d38030ee5a86ab8a748 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 18 Jun 2017 14:56:45 +0300 Subject: net/mlx5: Add RX buffer fullness counters infrastructure Add capability bit in PCAM register and counters to PPCNT register. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ba533b39c885..cf7ff52c594e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1538,7 +1538,17 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits { u8 port_transmit_wait_low[0x20]; - u8 reserved_at_40[0x780]; + u8 reserved_at_40[0x100]; + + u8 rx_buffer_almost_full_high[0x20]; + + u8 rx_buffer_almost_full_low[0x20]; + + u8 rx_buffer_full_high[0x20]; + + u8 rx_buffer_full_low[0x20]; + + u8 reserved_at_1c0[0x600]; }; struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits { @@ -7723,8 +7733,9 @@ struct mlx5_ifc_peir_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x7c]; + u8 reserved_at_0[0x7b]; + u8 rx_buffer_fullness_counters[0x1]; u8 ptys_connector_type[0x1]; u8 reserved_at_7d[0x1]; u8 ppcnt_discard_group[0x1]; -- cgit From efae7f78c45ba37bdc23a95d219b59ac85bdd0a7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Fri, 12 May 2017 02:47:02 +0300 Subject: net/mlx5e: Add outbound PCI buffer overflow counter Add outbound_pci_buffer_overflow to ethtool output for monitoring the number of packets that were dropped due to lack of PCIe buffers on receive path from NIC port toward the host(s). This counter is valid only in case that tx_overflow_buffer_pkt is supported in MCAM enhanced features. Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cf7ff52c594e..ae7d09b9c52f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1864,7 +1864,9 @@ struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { u8 crc_error_tlp[0x20]; - u8 reserved_at_140[0x40]; + u8 tx_overflow_buffer_pkt_high[0x20]; + + u8 tx_overflow_buffer_pkt_low[0x20]; u8 outbound_stalled_reads[0x20]; @@ -7767,7 +7769,7 @@ struct mlx5_ifc_pcam_reg_bits { struct mlx5_ifc_mcam_enhanced_features_bits { u8 reserved_at_0[0x7b]; u8 pcie_outbound_stalled[0x1]; - u8 reserved_at_7c[0x1]; + u8 tx_overflow_buffer_pkt[0x1]; u8 mtpps_enh_out_per_adj[0x1]; u8 mtpps_fs[0x1]; u8 pcie_performance_group[0x1]; -- cgit From 67c672ecd3a535f00efea18baac81983a8956084 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 12 Aug 2017 05:57:05 -0400 Subject: media: v4l2-ctrls.h: better document the arguments for v4l2_ctrl_fill The arguments for this function are pointers. Make it clear at its documentation. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-ctrls.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 2d2aed56922f..dacfe54057f8 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -340,17 +340,17 @@ struct v4l2_ctrl_config { * v4l2_ctrl_fill - Fill in the control fields based on the control ID. * * @id: ID of the control - * @name: name of the control - * @type: type of the control - * @min: minimum value for the control - * @max: maximum value for the control - * @step: control step - * @def: default value for the control - * @flags: flags to be used on the control + * @name: pointer to be filled with a string with the name of the control + * @type: pointer for storing the type of the control + * @min: pointer for storing the minimum value for the control + * @max: pointer for storing the maximum value for the control + * @step: pointer for storing the control step + * @def: pointer for storing the default value for the control + * @flags: pointer for storing the flags to be used on the control * * This works for all standard V4L2 controls. * For non-standard controls it will only fill in the given arguments - * and @name will be %NULL. + * and @name content will be set to %NULL. * * This function will overwrite the contents of @name, @type and @flags. * The contents of @min, @max, @step and @def may be modified depending on -- cgit From 9a6b2a87405a5022660022722d4a830b768e8033 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 15 Aug 2017 15:26:25 -0400 Subject: media: cec: rename pin events/function The CEC_EVENT_PIN_LOW/HIGH defines and the cec_queue_pin_event() function did not specify that these were about CEC pin events. Since in the future there will also be HPD pin events it is wise to rename the event defines and function to CEC_EVENT_PIN_CEC_LOW/HIGH and cec_queue_pin_cec_event() now before these become part of the ABI. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 9 +++++---- include/uapi/linux/cec.h | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 1bec7bde4792..224359c9941a 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -91,7 +91,7 @@ struct cec_event_entry { }; #define CEC_NUM_CORE_EVENTS 2 -#define CEC_NUM_EVENTS CEC_EVENT_PIN_HIGH +#define CEC_NUM_EVENTS CEC_EVENT_PIN_CEC_HIGH struct cec_fh { struct list_head list; @@ -280,14 +280,15 @@ static inline void cec_received_msg(struct cec_adapter *adap, } /** - * cec_queue_pin_event() - queue a pin event with a given timestamp. + * cec_queue_pin_cec_event() - queue a CEC pin event with a given timestamp. * * @adap: pointer to the cec adapter - * @is_high: when true the pin is high, otherwise it is low + * @is_high: when true the CEC pin is high, otherwise it is low * @ts: the timestamp for this event * */ -void cec_queue_pin_event(struct cec_adapter *adap, bool is_high, ktime_t ts); +void cec_queue_pin_cec_event(struct cec_adapter *adap, + bool is_high, ktime_t ts); /** * cec_get_edid_phys_addr() - find and return the physical address diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index d87a67b0bb06..4351c3481aea 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -408,8 +408,8 @@ struct cec_log_addrs { * didn't empty the message queue in time */ #define CEC_EVENT_LOST_MSGS 2 -#define CEC_EVENT_PIN_LOW 3 -#define CEC_EVENT_PIN_HIGH 4 +#define CEC_EVENT_PIN_CEC_LOW 3 +#define CEC_EVENT_PIN_CEC_HIGH 4 #define CEC_EVENT_FL_INITIAL_STATE (1 << 0) #define CEC_EVENT_FL_DROPPED_EVENTS (1 << 1) -- cgit From cb7474949371ba8d7591a62924f05b627a2601d9 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 16 Aug 2017 03:13:02 -0400 Subject: media: cec-pin: fix irq handling The free_irq() function could be called from interrupt context, which is invalid. Move this to the thread. In the interrupt handler we just request that the thread disables the irq. This is done through an atomic so we don't need to add any spinlocks. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec-pin.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/cec-pin.h b/include/media/cec-pin.h index 44b82d29d480..d28d07fa312e 100644 --- a/include/media/cec-pin.h +++ b/include/media/cec-pin.h @@ -113,6 +113,10 @@ struct cec_pin_ops { #define CEC_NUM_PIN_EVENTS 128 +#define CEC_PIN_IRQ_UNCHANGED 0 +#define CEC_PIN_IRQ_DISABLE 1 +#define CEC_PIN_IRQ_ENABLE 2 + struct cec_pin { struct cec_adapter *adap; const struct cec_pin_ops *ops; @@ -137,8 +141,8 @@ struct cec_pin { struct cec_msg work_rx_msg; u8 work_tx_status; - bool work_enable_irq; ktime_t work_tx_ts; + atomic_t work_irq_change; atomic_t work_pin_events; unsigned int work_pin_events_wr; unsigned int work_pin_events_rd; -- cgit From 68d9c47b1679ec8d55a005d39fc7a958ece82095 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 21 Jul 2017 15:28:33 -0400 Subject: media: Convert to using %pOF instead of full_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have a custom printf format specifier, convert users of full_name to use %pOF instead. This is preparation to remove storing of the full path string for each node. Signed-off-by: Rob Herring Acked-by: Niklas Söderlund Acked-by: Laurent Pinchart Reviewed-by: Matthias Brugger Acked-by: Nicolas Ferre Acked-by: Lad, Prabhakar Cc: Kyungmin Park Cc: Andrzej Hajda Cc: Mauro Carvalho Chehab Cc: Songjun Wu Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: Javier Martinez Canillas Cc: Minghsiu Tsai Cc: Houlong Wei Cc: Andrew-CT Chen Cc: Guennadi Liakhovetski Cc: Hyun Kwon Cc: Michal Simek Cc: "Sören Brinkmann" Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-mediatek@lists.infradead.org Cc: linux-renesas-soc@vger.kernel.org Reviewed-by: Sylwester Nawrocki Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-clk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-clk.h b/include/media/v4l2-clk.h index 2b94662d005c..7ec857f805a6 100644 --- a/include/media/v4l2-clk.h +++ b/include/media/v4l2-clk.h @@ -70,7 +70,7 @@ static inline struct v4l2_clk *v4l2_clk_register_fixed(const char *dev_id, #define v4l2_clk_name_i2c(name, size, adap, client) snprintf(name, size, \ "%d-%04x", adap, client) -#define v4l2_clk_name_of(name, size, of_full_name) snprintf(name, size, \ - "of-%s", of_full_name) +#define v4l2_clk_name_of(name, size, node) snprintf(name, size, \ + "of-%pOF", node) #endif -- cgit From 518f4b26be1ebf6ce220c4e37b5c7e5410c4d064 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 1 Jul 2017 12:13:19 -0400 Subject: media: rc-core: rename input_name to device_name When an ir-spi is registered, you get this message. rc rc0: Unspecified device as /devices/platform/soc/3f215080.spi/spi_master/spi32766/spi32766.128/rc/rc0 "Unspecified device" refers to input_name, which makes no sense for IR TX only devices. So, rename to device_name. Also make driver_name const char* so that no casts are needed anywhere. Now ir-spi reports: rc rc0: IR SPI as /devices/platform/soc/3f215080.spi/spi_master/spi32766/spi32766.128/rc/rc0 Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 2 +- include/media/rc-core.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index 224359c9941a..d97aa6c32abd 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -203,7 +203,7 @@ struct cec_adapter { u16 phys_addrs[15]; u32 sequence; - char input_name[32]; + char device_name[32]; char input_phys[32]; char input_drv[32]; }; diff --git a/include/media/rc-core.h b/include/media/rc-core.h index 78dea39a9b39..16bd89caa22d 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -72,7 +72,7 @@ enum rc_filter_type { * @dev: driver model's view of this device * @managed_alloc: devm_rc_allocate_device was used to create rc_dev * @sysfs_groups: sysfs attribute groups - * @input_name: name of the input child device + * @device_name: name of the rc child device * @input_phys: physical path to the input child device * @input_id: id of the input child device (struct input_id) * @driver_name: name of the hardware driver which registered this device @@ -138,10 +138,10 @@ struct rc_dev { struct device dev; bool managed_alloc; const struct attribute_group *sysfs_groups[5]; - const char *input_name; + const char *device_name; const char *input_phys; struct input_id input_id; - char *driver_name; + const char *driver_name; const char *map_name; struct rc_map rc_map; struct mutex lock; -- cgit From e8ffda78623677f7935b30901a173405b4861bda Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 30 Jul 2017 09:23:11 -0400 Subject: media: rc: ir-nec-decoder: move scancode composing code into a shared function The NEC scancode composing and protocol type detection in ir_nec_decode() is generic enough to be a shared function. Let's create an inline function in rc-core.h, so that other remote control drivers can reuse this function to save some code. Signed-off-by: Shawn Guo Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/rc-core.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/media/rc-core.h b/include/media/rc-core.h index 16bd89caa22d..b6c18840d125 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -340,4 +340,35 @@ static inline u32 ir_extract_bits(u32 data, u32 mask) return value; } +/* Get NEC scancode and protocol type from address and command bytes */ +static inline u32 ir_nec_bytes_to_scancode(u8 address, u8 not_address, + u8 command, u8 not_command, + enum rc_type *protocol) +{ + u32 scancode; + + if ((command ^ not_command) != 0xff) { + /* NEC transport, but modified protocol, used by at + * least Apple and TiVo remotes + */ + scancode = not_address << 24 | + address << 16 | + not_command << 8 | + command; + *protocol = RC_TYPE_NEC32; + } else if ((address ^ not_address) != 0xff) { + /* Extended NEC */ + scancode = address << 16 | + not_address << 8 | + command; + *protocol = RC_TYPE_NECX; + } else { + /* Normal NEC */ + scancode = address << 8 | command; + *protocol = RC_TYPE_NEC; + } + + return scancode; +} + #endif /* _RC_CORE */ -- cgit From b429996ced6f599b20ffc79789a54c65a21b0d96 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 30 Jul 2017 09:23:13 -0400 Subject: media: rc: add zx-irdec remote control driver It adds the remote control driver and corresponding keymap file for IRDEC block found on ZTE ZX family SoCs. Signed-off-by: Shawn Guo Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/rc-map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/rc-map.h b/include/media/rc-map.h index 1a815a572fa1..c69482852f29 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -313,6 +313,7 @@ struct rc_map *rc_map_get(const char *name); #define RC_MAP_WINFAST "rc-winfast" #define RC_MAP_WINFAST_USBII_DELUXE "rc-winfast-usbii-deluxe" #define RC_MAP_SU3000 "rc-su3000" +#define RC_MAP_ZX_IRDEC "rc-zx-irdec" /* * Please, do not just append newer Remote Controller names at the end. -- cgit From 86fe1ac0d563477b1d10d49a92237e3f3d74e7be Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 7 Aug 2017 08:38:10 -0400 Subject: media: rc: simplify ir_raw_event_store_edge() Since commit 12749b198fa4 ("[media] rc: saa7134: add trailing space for timely decoding"), the workaround of inserting reset events is no longer needed. Note that the initial reset is not needed either; other rc-core drivers that don't use ir_raw_event_store_edge() never call this at all. Verified on a HVR-1150 and Raspberry Pi. Fixes: 3f5c4c73322e ("[media] rc: fix ghost keypresses with certain hw") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/rc-core.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/media/rc-core.h b/include/media/rc-core.h index b6c18840d125..5be527ff851d 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -272,14 +272,6 @@ u32 rc_g_keycode_from_table(struct rc_dev *dev, u32 scancode); * The Raw interface is specific to InfraRed. It may be a good idea to * split it later into a separate header. */ - -enum raw_event_type { - IR_SPACE = (1 << 0), - IR_PULSE = (1 << 1), - IR_START_EVENT = (1 << 2), - IR_STOP_EVENT = (1 << 3), -}; - struct ir_raw_event { union { u32 duration; @@ -308,7 +300,7 @@ static inline void init_ir_raw_event(struct ir_raw_event *ev) void ir_raw_event_handle(struct rc_dev *dev); int ir_raw_event_store(struct rc_dev *dev, struct ir_raw_event *ev); -int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type); +int ir_raw_event_store_edge(struct rc_dev *dev, bool pulse); int ir_raw_event_store_with_filter(struct rc_dev *dev, struct ir_raw_event *ev); void ir_raw_event_set_idle(struct rc_dev *dev, bool idle); -- cgit From a9a249a2c997506a64eaee22f1458fda893f62a8 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 7 Aug 2017 09:31:24 -0400 Subject: media: cec: fix remote control passthrough The 'Press and Hold' operation was not correctly implemented, in particular the requirement that the repeat doesn't start until the second identical keypress arrives. The REP_DELAY value also had to be adjusted (see the comment in the code) to achieve the desired behavior. The 'enabled_protocols' field was also never set, fix that too. Since CEC is a fixed protocol the driver has to set this field. Signed-off-by: Hans Verkuil Acked-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/media/cec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/media/cec.h b/include/media/cec.h index d97aa6c32abd..60b26fc18464 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -190,6 +190,11 @@ struct cec_adapter { u32 tx_timeouts; +#ifdef CONFIG_MEDIA_CEC_RC + bool rc_repeating; + int rc_last_scancode; + u64 rc_last_keypress; +#endif #ifdef CONFIG_CEC_NOTIFIER struct cec_notifier *notifier; #endif -- cgit From 6d741bfed5ed06ed42a16d30f1ed7afdcaf7f092 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 7 Aug 2017 16:20:58 -0400 Subject: media: rc: rename RC_TYPE_* to RC_PROTO_* and RC_BIT_* to RC_PROTO_BIT_* RC_TYPE is confusing and it's just the protocol. So rename it. Suggested-by: Hans Verkuil Signed-off-by: Sean Young Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/i2c/ir-kbd-i2c.h | 8 +- include/media/rc-core.h | 33 ++++--- include/media/rc-map.h | 215 +++++++++++++++++++++-------------------- 3 files changed, 135 insertions(+), 121 deletions(-) (limited to 'include') diff --git a/include/media/i2c/ir-kbd-i2c.h b/include/media/i2c/ir-kbd-i2c.h index d8564354debb..ac8c55617a79 100644 --- a/include/media/i2c/ir-kbd-i2c.h +++ b/include/media/i2c/ir-kbd-i2c.h @@ -20,7 +20,8 @@ struct IR_i2c { struct delayed_work work; char name[32]; char phys[32]; - int (*get_key)(struct IR_i2c *ir, enum rc_type *protocol, + int (*get_key)(struct IR_i2c *ir, + enum rc_proto *protocol, u32 *scancode, u8 *toggle); }; @@ -38,14 +39,15 @@ enum ir_kbd_get_key_fn { struct IR_i2c_init_data { char *ir_codes; const char *name; - u64 type; /* RC_BIT_RC5, etc */ + u64 type; /* RC_PROTO_BIT_RC5, etc */ u32 polling_interval; /* 0 means DEFAULT_POLLING_INTERVAL */ /* * Specify either a function pointer or a value indicating one of * ir_kbd_i2c's internal get_key functions */ - int (*get_key)(struct IR_i2c *ir, enum rc_type *protocol, + int (*get_key)(struct IR_i2c *ir, + enum rc_proto *protocol, u32 *scancode, u8 *toggle); enum ir_kbd_get_key_fn internal_get_key_func; diff --git a/include/media/rc-core.h b/include/media/rc-core.h index 5be527ff851d..314a1edb6189 100644 --- a/include/media/rc-core.h +++ b/include/media/rc-core.h @@ -87,11 +87,12 @@ enum rc_filter_type { * @idle: used to keep track of RX state * @encode_wakeup: wakeup filtering uses IR encode API, therefore the allowed * wakeup protocols is the set of all raw encoders - * @allowed_protocols: bitmask with the supported RC_BIT_* protocols - * @enabled_protocols: bitmask with the enabled RC_BIT_* protocols - * @allowed_wakeup_protocols: bitmask with the supported RC_BIT_* wakeup protocols - * @wakeup_protocol: the enabled RC_TYPE_* wakeup protocol or - * RC_TYPE_UNKNOWN if disabled. + * @allowed_protocols: bitmask with the supported RC_PROTO_BIT_* protocols + * @enabled_protocols: bitmask with the enabled RC_PROTO_BIT_* protocols + * @allowed_wakeup_protocols: bitmask with the supported RC_PROTO_BIT_* wakeup + * protocols + * @wakeup_protocol: the enabled RC_PROTO_* wakeup protocol or + * RC_PROTO_UNKNOWN if disabled. * @scancode_filter: scancode filter * @scancode_wakeup_filter: scancode wakeup filters * @scancode_mask: some hardware decoders are not capable of providing the full @@ -154,7 +155,7 @@ struct rc_dev { u64 allowed_protocols; u64 enabled_protocols; u64 allowed_wakeup_protocols; - enum rc_type wakeup_protocol; + enum rc_proto wakeup_protocol; struct rc_scancode_filter scancode_filter; struct rc_scancode_filter scancode_wakeup_filter; u32 scancode_mask; @@ -165,7 +166,7 @@ struct rc_dev { unsigned long keyup_jiffies; struct timer_list timer_keyup; u32 last_keycode; - enum rc_type last_protocol; + enum rc_proto last_protocol; u32 last_scancode; u8 last_toggle; u32 timeout; @@ -173,7 +174,7 @@ struct rc_dev { u32 max_timeout; u32 rx_resolution; u32 tx_resolution; - int (*change_protocol)(struct rc_dev *dev, u64 *rc_type); + int (*change_protocol)(struct rc_dev *dev, u64 *rc_proto); int (*open)(struct rc_dev *dev); void (*close)(struct rc_dev *dev); int (*s_tx_mask)(struct rc_dev *dev, u32 mask); @@ -262,8 +263,10 @@ int rc_open(struct rc_dev *rdev); void rc_close(struct rc_dev *rdev); void rc_repeat(struct rc_dev *dev); -void rc_keydown(struct rc_dev *dev, enum rc_type protocol, u32 scancode, u8 toggle); -void rc_keydown_notimeout(struct rc_dev *dev, enum rc_type protocol, u32 scancode, u8 toggle); +void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u32 scancode, + u8 toggle); +void rc_keydown_notimeout(struct rc_dev *dev, enum rc_proto protocol, + u32 scancode, u8 toggle); void rc_keyup(struct rc_dev *dev); u32 rc_g_keycode_from_table(struct rc_dev *dev, u32 scancode); @@ -304,7 +307,7 @@ int ir_raw_event_store_edge(struct rc_dev *dev, bool pulse); int ir_raw_event_store_with_filter(struct rc_dev *dev, struct ir_raw_event *ev); void ir_raw_event_set_idle(struct rc_dev *dev, bool idle); -int ir_raw_encode_scancode(enum rc_type protocol, u32 scancode, +int ir_raw_encode_scancode(enum rc_proto protocol, u32 scancode, struct ir_raw_event *events, unsigned int max); static inline void ir_raw_event_reset(struct rc_dev *dev) @@ -335,7 +338,7 @@ static inline u32 ir_extract_bits(u32 data, u32 mask) /* Get NEC scancode and protocol type from address and command bytes */ static inline u32 ir_nec_bytes_to_scancode(u8 address, u8 not_address, u8 command, u8 not_command, - enum rc_type *protocol) + enum rc_proto *protocol) { u32 scancode; @@ -347,17 +350,17 @@ static inline u32 ir_nec_bytes_to_scancode(u8 address, u8 not_address, address << 16 | not_command << 8 | command; - *protocol = RC_TYPE_NEC32; + *protocol = RC_PROTO_NEC32; } else if ((address ^ not_address) != 0xff) { /* Extended NEC */ scancode = address << 16 | not_address << 8 | command; - *protocol = RC_TYPE_NECX; + *protocol = RC_PROTO_NECX; } else { /* Normal NEC */ scancode = address << 8 | command; - *protocol = RC_TYPE_NEC; + *protocol = RC_PROTO_NEC; } return scancode; diff --git a/include/media/rc-map.h b/include/media/rc-map.h index c69482852f29..2a160e6e823c 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -12,113 +12,122 @@ #include /** - * enum rc_type - type of the Remote Controller protocol + * enum rc_proto - the Remote Controller protocol * - * @RC_TYPE_UNKNOWN: Protocol not known - * @RC_TYPE_OTHER: Protocol known but proprietary - * @RC_TYPE_RC5: Philips RC5 protocol - * @RC_TYPE_RC5X_20: Philips RC5x 20 bit protocol - * @RC_TYPE_RC5_SZ: StreamZap variant of RC5 - * @RC_TYPE_JVC: JVC protocol - * @RC_TYPE_SONY12: Sony 12 bit protocol - * @RC_TYPE_SONY15: Sony 15 bit protocol - * @RC_TYPE_SONY20: Sony 20 bit protocol - * @RC_TYPE_NEC: NEC protocol - * @RC_TYPE_NECX: Extended NEC protocol - * @RC_TYPE_NEC32: NEC 32 bit protocol - * @RC_TYPE_SANYO: Sanyo protocol - * @RC_TYPE_MCIR2_KBD: RC6-ish MCE keyboard - * @RC_TYPE_MCIR2_MSE: RC6-ish MCE mouse - * @RC_TYPE_RC6_0: Philips RC6-0-16 protocol - * @RC_TYPE_RC6_6A_20: Philips RC6-6A-20 protocol - * @RC_TYPE_RC6_6A_24: Philips RC6-6A-24 protocol - * @RC_TYPE_RC6_6A_32: Philips RC6-6A-32 protocol - * @RC_TYPE_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol - * @RC_TYPE_SHARP: Sharp protocol - * @RC_TYPE_XMP: XMP protocol - * @RC_TYPE_CEC: CEC protocol + * @RC_PROTO_UNKNOWN: Protocol not known + * @RC_PROTO_OTHER: Protocol known but proprietary + * @RC_PROTO_RC5: Philips RC5 protocol + * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol + * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 + * @RC_PROTO_JVC: JVC protocol + * @RC_PROTO_SONY12: Sony 12 bit protocol + * @RC_PROTO_SONY15: Sony 15 bit protocol + * @RC_PROTO_SONY20: Sony 20 bit protocol + * @RC_PROTO_NEC: NEC protocol + * @RC_PROTO_NECX: Extended NEC protocol + * @RC_PROTO_NEC32: NEC 32 bit protocol + * @RC_PROTO_SANYO: Sanyo protocol + * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard + * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse + * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol + * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol + * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol + * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol + * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol + * @RC_PROTO_SHARP: Sharp protocol + * @RC_PROTO_XMP: XMP protocol + * @RC_PROTO_CEC: CEC protocol */ -enum rc_type { - RC_TYPE_UNKNOWN = 0, - RC_TYPE_OTHER = 1, - RC_TYPE_RC5 = 2, - RC_TYPE_RC5X_20 = 3, - RC_TYPE_RC5_SZ = 4, - RC_TYPE_JVC = 5, - RC_TYPE_SONY12 = 6, - RC_TYPE_SONY15 = 7, - RC_TYPE_SONY20 = 8, - RC_TYPE_NEC = 9, - RC_TYPE_NECX = 10, - RC_TYPE_NEC32 = 11, - RC_TYPE_SANYO = 12, - RC_TYPE_MCIR2_KBD = 13, - RC_TYPE_MCIR2_MSE = 14, - RC_TYPE_RC6_0 = 15, - RC_TYPE_RC6_6A_20 = 16, - RC_TYPE_RC6_6A_24 = 17, - RC_TYPE_RC6_6A_32 = 18, - RC_TYPE_RC6_MCE = 19, - RC_TYPE_SHARP = 20, - RC_TYPE_XMP = 21, - RC_TYPE_CEC = 22, +enum rc_proto { + RC_PROTO_UNKNOWN = 0, + RC_PROTO_OTHER = 1, + RC_PROTO_RC5 = 2, + RC_PROTO_RC5X_20 = 3, + RC_PROTO_RC5_SZ = 4, + RC_PROTO_JVC = 5, + RC_PROTO_SONY12 = 6, + RC_PROTO_SONY15 = 7, + RC_PROTO_SONY20 = 8, + RC_PROTO_NEC = 9, + RC_PROTO_NECX = 10, + RC_PROTO_NEC32 = 11, + RC_PROTO_SANYO = 12, + RC_PROTO_MCIR2_KBD = 13, + RC_PROTO_MCIR2_MSE = 14, + RC_PROTO_RC6_0 = 15, + RC_PROTO_RC6_6A_20 = 16, + RC_PROTO_RC6_6A_24 = 17, + RC_PROTO_RC6_6A_32 = 18, + RC_PROTO_RC6_MCE = 19, + RC_PROTO_SHARP = 20, + RC_PROTO_XMP = 21, + RC_PROTO_CEC = 22, }; -#define RC_BIT_NONE 0ULL -#define RC_BIT_UNKNOWN BIT_ULL(RC_TYPE_UNKNOWN) -#define RC_BIT_OTHER BIT_ULL(RC_TYPE_OTHER) -#define RC_BIT_RC5 BIT_ULL(RC_TYPE_RC5) -#define RC_BIT_RC5X_20 BIT_ULL(RC_TYPE_RC5X_20) -#define RC_BIT_RC5_SZ BIT_ULL(RC_TYPE_RC5_SZ) -#define RC_BIT_JVC BIT_ULL(RC_TYPE_JVC) -#define RC_BIT_SONY12 BIT_ULL(RC_TYPE_SONY12) -#define RC_BIT_SONY15 BIT_ULL(RC_TYPE_SONY15) -#define RC_BIT_SONY20 BIT_ULL(RC_TYPE_SONY20) -#define RC_BIT_NEC BIT_ULL(RC_TYPE_NEC) -#define RC_BIT_NECX BIT_ULL(RC_TYPE_NECX) -#define RC_BIT_NEC32 BIT_ULL(RC_TYPE_NEC32) -#define RC_BIT_SANYO BIT_ULL(RC_TYPE_SANYO) -#define RC_BIT_MCIR2_KBD BIT_ULL(RC_TYPE_MCIR2_KBD) -#define RC_BIT_MCIR2_MSE BIT_ULL(RC_TYPE_MCIR2_MSE) -#define RC_BIT_RC6_0 BIT_ULL(RC_TYPE_RC6_0) -#define RC_BIT_RC6_6A_20 BIT_ULL(RC_TYPE_RC6_6A_20) -#define RC_BIT_RC6_6A_24 BIT_ULL(RC_TYPE_RC6_6A_24) -#define RC_BIT_RC6_6A_32 BIT_ULL(RC_TYPE_RC6_6A_32) -#define RC_BIT_RC6_MCE BIT_ULL(RC_TYPE_RC6_MCE) -#define RC_BIT_SHARP BIT_ULL(RC_TYPE_SHARP) -#define RC_BIT_XMP BIT_ULL(RC_TYPE_XMP) -#define RC_BIT_CEC BIT_ULL(RC_TYPE_CEC) +#define RC_PROTO_BIT_NONE 0ULL +#define RC_PROTO_BIT_UNKNOWN BIT_ULL(RC_PROTO_UNKNOWN) +#define RC_PROTO_BIT_OTHER BIT_ULL(RC_PROTO_OTHER) +#define RC_PROTO_BIT_RC5 BIT_ULL(RC_PROTO_RC5) +#define RC_PROTO_BIT_RC5X_20 BIT_ULL(RC_PROTO_RC5X_20) +#define RC_PROTO_BIT_RC5_SZ BIT_ULL(RC_PROTO_RC5_SZ) +#define RC_PROTO_BIT_JVC BIT_ULL(RC_PROTO_JVC) +#define RC_PROTO_BIT_SONY12 BIT_ULL(RC_PROTO_SONY12) +#define RC_PROTO_BIT_SONY15 BIT_ULL(RC_PROTO_SONY15) +#define RC_PROTO_BIT_SONY20 BIT_ULL(RC_PROTO_SONY20) +#define RC_PROTO_BIT_NEC BIT_ULL(RC_PROTO_NEC) +#define RC_PROTO_BIT_NECX BIT_ULL(RC_PROTO_NECX) +#define RC_PROTO_BIT_NEC32 BIT_ULL(RC_PROTO_NEC32) +#define RC_PROTO_BIT_SANYO BIT_ULL(RC_PROTO_SANYO) +#define RC_PROTO_BIT_MCIR2_KBD BIT_ULL(RC_PROTO_MCIR2_KBD) +#define RC_PROTO_BIT_MCIR2_MSE BIT_ULL(RC_PROTO_MCIR2_MSE) +#define RC_PROTO_BIT_RC6_0 BIT_ULL(RC_PROTO_RC6_0) +#define RC_PROTO_BIT_RC6_6A_20 BIT_ULL(RC_PROTO_RC6_6A_20) +#define RC_PROTO_BIT_RC6_6A_24 BIT_ULL(RC_PROTO_RC6_6A_24) +#define RC_PROTO_BIT_RC6_6A_32 BIT_ULL(RC_PROTO_RC6_6A_32) +#define RC_PROTO_BIT_RC6_MCE BIT_ULL(RC_PROTO_RC6_MCE) +#define RC_PROTO_BIT_SHARP BIT_ULL(RC_PROTO_SHARP) +#define RC_PROTO_BIT_XMP BIT_ULL(RC_PROTO_XMP) +#define RC_PROTO_BIT_CEC BIT_ULL(RC_PROTO_CEC) -#define RC_BIT_ALL (RC_BIT_UNKNOWN | RC_BIT_OTHER | \ - RC_BIT_RC5 | RC_BIT_RC5X_20 | RC_BIT_RC5_SZ | \ - RC_BIT_JVC | \ - RC_BIT_SONY12 | RC_BIT_SONY15 | RC_BIT_SONY20 | \ - RC_BIT_NEC | RC_BIT_NECX | RC_BIT_NEC32 | \ - RC_BIT_SANYO | \ - RC_BIT_MCIR2_KBD | RC_BIT_MCIR2_MSE | \ - RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ - RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | RC_BIT_SHARP | \ - RC_BIT_XMP | RC_BIT_CEC) +#define RC_PROTO_BIT_ALL \ + (RC_PROTO_BIT_UNKNOWN | RC_PROTO_BIT_OTHER | \ + RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \ + RC_PROTO_BIT_RC5_SZ | RC_PROTO_BIT_JVC | \ + RC_PROTO_BIT_SONY12 | RC_PROTO_BIT_SONY15 | \ + RC_PROTO_BIT_SONY20 | RC_PROTO_BIT_NEC | \ + RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32 | \ + RC_PROTO_BIT_SANYO | \ + RC_PROTO_BIT_MCIR2_KBD | RC_PROTO_BIT_MCIR2_MSE | \ + RC_PROTO_BIT_RC6_0 | RC_PROTO_BIT_RC6_6A_20 | \ + RC_PROTO_BIT_RC6_6A_24 | RC_PROTO_BIT_RC6_6A_32 | \ + RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_SHARP | \ + RC_PROTO_BIT_XMP | RC_PROTO_BIT_CEC) /* All rc protocols for which we have decoders */ -#define RC_BIT_ALL_IR_DECODER \ - (RC_BIT_RC5 | RC_BIT_RC5X_20 | RC_BIT_RC5_SZ | \ - RC_BIT_JVC | \ - RC_BIT_SONY12 | RC_BIT_SONY15 | RC_BIT_SONY20 | \ - RC_BIT_NEC | RC_BIT_NECX | RC_BIT_NEC32 | \ - RC_BIT_SANYO | RC_BIT_MCIR2_KBD | RC_BIT_MCIR2_MSE | \ - RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ - RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | RC_BIT_SHARP | \ - RC_BIT_XMP) +#define RC_PROTO_BIT_ALL_IR_DECODER \ + (RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \ + RC_PROTO_BIT_RC5_SZ | RC_PROTO_BIT_JVC | \ + RC_PROTO_BIT_SONY12 | RC_PROTO_BIT_SONY15 | \ + RC_PROTO_BIT_SONY20 | RC_PROTO_BIT_NEC | \ + RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32 | \ + RC_PROTO_BIT_SANYO | RC_PROTO_BIT_MCIR2_KBD | \ + RC_PROTO_BIT_MCIR2_MSE | \ + RC_PROTO_BIT_RC6_0 | RC_PROTO_BIT_RC6_6A_20 | \ + RC_PROTO_BIT_RC6_6A_24 | RC_PROTO_BIT_RC6_6A_32 | \ + RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_SHARP | \ + RC_PROTO_BIT_XMP) -#define RC_BIT_ALL_IR_ENCODER \ - (RC_BIT_RC5 | RC_BIT_RC5X_20 | RC_BIT_RC5_SZ | \ - RC_BIT_JVC | \ - RC_BIT_SONY12 | RC_BIT_SONY15 | RC_BIT_SONY20 | \ - RC_BIT_NEC | RC_BIT_NECX | RC_BIT_NEC32 | \ - RC_BIT_SANYO | RC_BIT_MCIR2_KBD | RC_BIT_MCIR2_MSE | \ - RC_BIT_RC6_0 | RC_BIT_RC6_6A_20 | RC_BIT_RC6_6A_24 | \ - RC_BIT_RC6_6A_32 | RC_BIT_RC6_MCE | \ - RC_BIT_SHARP) +#define RC_PROTO_BIT_ALL_IR_ENCODER \ + (RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \ + RC_PROTO_BIT_RC5_SZ | RC_PROTO_BIT_JVC | \ + RC_PROTO_BIT_SONY12 | RC_PROTO_BIT_SONY15 | \ + RC_PROTO_BIT_SONY20 | RC_PROTO_BIT_NEC | \ + RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32 | \ + RC_PROTO_BIT_SANYO | RC_PROTO_BIT_MCIR2_KBD | \ + RC_PROTO_BIT_MCIR2_MSE | \ + RC_PROTO_BIT_RC6_0 | RC_PROTO_BIT_RC6_6A_20 | \ + RC_PROTO_BIT_RC6_6A_24 | \ + RC_PROTO_BIT_RC6_6A_32 | RC_PROTO_BIT_RC6_MCE | \ + RC_PROTO_BIT_SHARP) #define RC_SCANCODE_UNKNOWN(x) (x) #define RC_SCANCODE_OTHER(x) (x) @@ -148,8 +157,8 @@ struct rc_map_table { * @size: Max number of entries * @len: Number of entries that are in use * @alloc: size of \*scan, in bytes - * @rc_type: type of the remote controller protocol, as defined at - * enum &rc_type + * @rc_proto: type of the remote controller protocol, as defined at + * enum &rc_proto * @name: name of the key map table * @lock: lock to protect access to this structure */ @@ -158,7 +167,7 @@ struct rc_map { unsigned int size; unsigned int len; unsigned int alloc; - enum rc_type rc_type; + enum rc_proto rc_proto; const char *name; spinlock_t lock; }; -- cgit From f00fd7ae4f409abb7b2e5d099248832548199f0c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 30 Jul 2017 16:14:42 -0600 Subject: PATCH] iio: Fix some documentation warnings The kerneldoc description for the trig_readonly field of struct iio_dev lacked a colon, leading to this doc build warning: ./include/linux/iio/iio.h:603: warning: No description found for parameter 'trig_readonly' A similar issue for iio_trigger_set_immutable() in trigger.h yielded: ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'indio_dev' ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'trig' Fix the formatting and silence the warnings. Signed-off-by: Jonathan Corbet Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- include/linux/iio/trigger.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d68bec297a45..c380daa40c0e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -535,7 +535,7 @@ struct iio_buffer_setup_ops { * @scan_timestamp: [INTERN] set if any buffers have requested timestamp * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) - * @trig_readonly [INTERN] mark the current trigger immutable + * @trig_readonly: [INTERN] mark the current trigger immutable * @pollfunc: [DRIVER] function run on trigger being received * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index ea08302f2d7b..7142d8d6e470 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -144,8 +144,8 @@ void devm_iio_trigger_unregister(struct device *dev, /** * iio_trigger_set_immutable() - set an immutable trigger on destination * - * @indio_dev - IIO device structure containing the device - * @trig - trigger to assign to device + * @indio_dev: IIO device structure containing the device + * @trig: trigger to assign to device * **/ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig); -- cgit From 3bde7afdabe9f37974af806abe646c2ca43c67c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Aug 2017 11:33:25 -0400 Subject: NFS: Remove unused parameter gfp_flags from nfs_pageio_init() Now that the mirror allocation has been moved, the parameter can go. Also remove the redundant symbol export. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d67b67ae6c8b..8b1a35aad0c3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -125,8 +125,7 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int how, - gfp_t gfp_flags); + int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, -- cgit From 68a66d149a8c78ec6720f268597302883e48e9fa Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 19 Aug 2017 15:37:07 +0300 Subject: net_sched: fix order of queue length updates in qdisc_replace() This important to call qdisc_tree_reduce_backlog() after changing queue length. Parent qdisc should deactivate class in ->qlen_notify() called from qdisc_tree_reduce_backlog() but this happens only if qdisc->q.qlen in zero. Missed class deactivations leads to crashes/warnings at picking packets from empty qdisc and corrupting state at reactivating this class in future. Signed-off-by: Konstantin Khlebnikov Fixes: 86a7996cc8a0 ("net_sched: introduce qdisc_replace() helper") Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1c123e2b2415..67f815e5d525 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -806,8 +806,11 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) { - qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); + unsigned int qlen = old->q.qlen; + unsigned int backlog = old->qstats.backlog; + qdisc_reset(old); + qdisc_tree_reduce_backlog(old, qlen, backlog); } sch_tree_unlock(sch); -- cgit From 9d6105e19f617406aea46dd19281080c7c5ae0d8 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Mon, 21 Aug 2017 03:28:34 +0200 Subject: mfd: rk808: Fix up the chip id get failed the rk8xx chip id is: ((MSB << 8) | LSB) & 0xfff0 Signed-off-by: Elaine Zhang Signed-off-by: Joseph Chen Signed-off-by: Heiko Stuebner Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 83701ef7d3c7..54feb140c210 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -298,6 +298,7 @@ enum rk818_reg { #define VOUT_LO_INT BIT(0) #define CLK32KOUT2_EN BIT(0) +#define RK8XX_ID_MSK 0xfff0 enum { BUCK_ILMIN_50MA, BUCK_ILMIN_100MA, -- cgit From a5247ca6708ff7dc089d2774c131455774153eb6 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Mon, 21 Aug 2017 03:28:35 +0200 Subject: mfd: rk808: Add rk805 regs addr and ID Signed-off-by: Elaine Zhang Signed-off-by: Joseph Chen Signed-off-by: Heiko Stuebner Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 120 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 54feb140c210..d3156594674c 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -206,6 +206,97 @@ enum rk818_reg { #define RK818_USB_ILMIN_2000MA 0x7 #define RK818_USB_CHG_SD_VSEL_MASK 0x70 +/* RK805 */ +enum rk805_reg { + RK805_ID_DCDC1, + RK805_ID_DCDC2, + RK805_ID_DCDC3, + RK805_ID_DCDC4, + RK805_ID_LDO1, + RK805_ID_LDO2, + RK805_ID_LDO3, +}; + +/* CONFIG REGISTER */ +#define RK805_VB_MON_REG 0x21 +#define RK805_THERMAL_REG 0x22 + +/* POWER CHANNELS ENABLE REGISTER */ +#define RK805_DCDC_EN_REG 0x23 +#define RK805_SLP_DCDC_EN_REG 0x25 +#define RK805_SLP_LDO_EN_REG 0x26 +#define RK805_LDO_EN_REG 0x27 + +/* BUCK AND LDO CONFIG REGISTER */ +#define RK805_BUCK_LDO_SLP_LP_EN_REG 0x2A +#define RK805_BUCK1_CONFIG_REG 0x2E +#define RK805_BUCK1_ON_VSEL_REG 0x2F +#define RK805_BUCK1_SLP_VSEL_REG 0x30 +#define RK805_BUCK2_CONFIG_REG 0x32 +#define RK805_BUCK2_ON_VSEL_REG 0x33 +#define RK805_BUCK2_SLP_VSEL_REG 0x34 +#define RK805_BUCK3_CONFIG_REG 0x36 +#define RK805_BUCK4_CONFIG_REG 0x37 +#define RK805_BUCK4_ON_VSEL_REG 0x38 +#define RK805_BUCK4_SLP_VSEL_REG 0x39 +#define RK805_LDO1_ON_VSEL_REG 0x3B +#define RK805_LDO1_SLP_VSEL_REG 0x3C +#define RK805_LDO2_ON_VSEL_REG 0x3D +#define RK805_LDO2_SLP_VSEL_REG 0x3E +#define RK805_LDO3_ON_VSEL_REG 0x3F +#define RK805_LDO3_SLP_VSEL_REG 0x40 + +/* INTERRUPT REGISTER */ +#define RK805_PWRON_LP_INT_TIME_REG 0x47 +#define RK805_PWRON_DB_REG 0x48 +#define RK805_DEV_CTRL_REG 0x4B +#define RK805_INT_STS_REG 0x4C +#define RK805_INT_STS_MSK_REG 0x4D +#define RK805_GPIO_IO_POL_REG 0x50 +#define RK805_OUT_REG 0x52 +#define RK805_ON_SOURCE_REG 0xAE +#define RK805_OFF_SOURCE_REG 0xAF + +#define RK805_NUM_REGULATORS 7 + +#define RK805_PWRON_FALL_RISE_INT_EN 0x0 +#define RK805_PWRON_FALL_RISE_INT_MSK 0x81 + +/* RK805 IRQ Definitions */ +#define RK805_IRQ_PWRON_RISE 0 +#define RK805_IRQ_VB_LOW 1 +#define RK805_IRQ_PWRON 2 +#define RK805_IRQ_PWRON_LP 3 +#define RK805_IRQ_HOTDIE 4 +#define RK805_IRQ_RTC_ALARM 5 +#define RK805_IRQ_RTC_PERIOD 6 +#define RK805_IRQ_PWRON_FALL 7 + +#define RK805_IRQ_PWRON_RISE_MSK BIT(0) +#define RK805_IRQ_VB_LOW_MSK BIT(1) +#define RK805_IRQ_PWRON_MSK BIT(2) +#define RK805_IRQ_PWRON_LP_MSK BIT(3) +#define RK805_IRQ_HOTDIE_MSK BIT(4) +#define RK805_IRQ_RTC_ALARM_MSK BIT(5) +#define RK805_IRQ_RTC_PERIOD_MSK BIT(6) +#define RK805_IRQ_PWRON_FALL_MSK BIT(7) + +#define RK805_PWR_RISE_INT_STATUS BIT(0) +#define RK805_VB_LOW_INT_STATUS BIT(1) +#define RK805_PWRON_INT_STATUS BIT(2) +#define RK805_PWRON_LP_INT_STATUS BIT(3) +#define RK805_HOTDIE_INT_STATUS BIT(4) +#define RK805_ALARM_INT_STATUS BIT(5) +#define RK805_PERIOD_INT_STATUS BIT(6) +#define RK805_PWR_FALL_INT_STATUS BIT(7) + +#define RK805_BUCK1_2_ILMAX_MASK (3 << 6) +#define RK805_BUCK3_4_ILMAX_MASK (3 << 3) +#define RK805_RTC_PERIOD_INT_MASK (1 << 6) +#define RK805_RTC_ALARM_INT_MASK (1 << 5) +#define RK805_INT_ALARM_EN (1 << 3) +#define RK805_INT_TIMER_EN (1 << 2) + /* RK808 IRQ Definitions */ #define RK808_IRQ_VOUT_LO 0 #define RK808_IRQ_VB_LO 1 @@ -298,7 +389,14 @@ enum rk818_reg { #define VOUT_LO_INT BIT(0) #define CLK32KOUT2_EN BIT(0) +#define TEMP115C 0x0c +#define TEMP_HOTDIE_MSK 0x0c +#define SLP_SD_MSK (0x3 << 2) +#define SHUTDOWN_FUN (0x2 << 2) +#define SLEEP_FUN (0x1 << 2) #define RK8XX_ID_MSK 0xfff0 +#define FPWM_MODE BIT(7) + enum { BUCK_ILMIN_50MA, BUCK_ILMIN_100MA, @@ -322,6 +420,28 @@ enum { }; enum { + RK805_BUCK1_2_ILMAX_2500MA, + RK805_BUCK1_2_ILMAX_3000MA, + RK805_BUCK1_2_ILMAX_3500MA, + RK805_BUCK1_2_ILMAX_4000MA, +}; + +enum { + RK805_BUCK3_ILMAX_1500MA, + RK805_BUCK3_ILMAX_2000MA, + RK805_BUCK3_ILMAX_2500MA, + RK805_BUCK3_ILMAX_3000MA, +}; + +enum { + RK805_BUCK4_ILMAX_2000MA, + RK805_BUCK4_ILMAX_2500MA, + RK805_BUCK4_ILMAX_3000MA, + RK805_BUCK4_ILMAX_3500MA, +}; + +enum { + RK805_ID = 0x8050, RK808_ID = 0x0000, RK818_ID = 0x8181, }; -- cgit From 6c83fd5142c68294acb0e857b7bac2ce8a5077f7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 21 Aug 2017 14:06:46 +0200 Subject: quota: Add lock annotations to struct members Add annotation which lock protects which struct members to struct dquot and struct mem_dqinfo. Signed-off-by: Jan Kara --- include/linux/quota.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/quota.h b/include/linux/quota.h index 074123975595..5ac9de4fcd6f 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -223,12 +223,12 @@ struct mem_dqinfo { struct quota_format_type *dqi_format; int dqi_fmt_id; /* Id of the dqi_format - used when turning * quotas on after remount RW */ - struct list_head dqi_dirty_list; /* List of dirty dquots */ - unsigned long dqi_flags; - unsigned int dqi_bgrace; - unsigned int dqi_igrace; - qsize_t dqi_max_spc_limit; - qsize_t dqi_max_ino_limit; + struct list_head dqi_dirty_list; /* List of dirty dquots [dq_list_lock] */ + unsigned long dqi_flags; /* DFQ_ flags [dq_data_lock] */ + unsigned int dqi_bgrace; /* Space grace time [dq_data_lock] */ + unsigned int dqi_igrace; /* Inode grace time [dq_data_lock] */ + qsize_t dqi_max_spc_limit; /* Maximum space limit [static] */ + qsize_t dqi_max_ino_limit; /* Maximum inode limit [static] */ void *dqi_priv; }; @@ -293,16 +293,16 @@ static inline void dqstats_dec(unsigned int type) * clear them when it sees fit. */ struct dquot { - struct hlist_node dq_hash; /* Hash list in memory */ - struct list_head dq_inuse; /* List of all quotas */ - struct list_head dq_free; /* Free list element */ - struct list_head dq_dirty; /* List of dirty dquots */ + struct hlist_node dq_hash; /* Hash list in memory [dq_list_lock] */ + struct list_head dq_inuse; /* List of all quotas [dq_list_lock] */ + struct list_head dq_free; /* Free list element [dq_list_lock] */ + struct list_head dq_dirty; /* List of dirty dquots [dq_list_lock] */ struct mutex dq_lock; /* dquot IO lock */ spinlock_t dq_dqb_lock; /* Lock protecting dq_dqb changes */ atomic_t dq_count; /* Use count */ struct super_block *dq_sb; /* superblock this applies to */ struct kqid dq_id; /* ID this applies to (uid, gid, projid) */ - loff_t dq_off; /* Offset of dquot on disk */ + loff_t dq_off; /* Offset of dquot on disk [dq_lock, stable once set] */ unsigned long dq_flags; /* See DQ_* */ struct mem_dqblk dq_dqb; /* Diskquota usage [dq_dqb_lock] */ }; -- cgit From 7467c9d9598993d8531b2f76c090a5743000612b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 24 Jul 2017 14:55:13 +0100 Subject: of: return of_get_cpu_node from of_cpu_device_node_get if CPUs are not registered Instead of the callsites choosing between of_cpu_device_node_get if the CPUs are registered as of_node is populated by then and of_get_cpu_node when the CPUs are not yet registered as CPU of_nodes are not yet stashed thereby needing to parse the device tree, we can call of_get_cpu_node in case the CPUs are not yet registered. This will allow to use of_cpu_device_node_get anywhere hiding the details from the caller. Cc: Rob Herring Cc: Frank Rowand Signed-off-by: Sudeep Holla Signed-off-by: Rob Herring --- include/linux/of_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index b4ad8b4f8506..611502524425 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -50,7 +50,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) struct device *cpu_dev; cpu_dev = get_cpu_device(cpu); if (!cpu_dev) - return NULL; + return of_get_cpu_node(cpu, NULL); return of_node_get(cpu_dev->of_node); } -- cgit From 89e49506bc62520f93e64a278293444319a6aebb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 17 Aug 2017 16:47:00 +0200 Subject: dsa: remove unused net_device arg from handlers compile tested only, but saw no warnings/errors with allmodconfig build. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/dsa.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 7f46b521313e..398ca8d70ccd 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -104,8 +104,7 @@ struct packet_type; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); + struct packet_type *pt); int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); }; @@ -134,8 +133,7 @@ struct dsa_switch_tree { /* Copy of tag_ops->rcv for faster access in hot path */ struct sk_buff * (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); + struct packet_type *pt); /* * The switch port to which the CPU is attached. -- cgit From dd1c1f2f2028a7b851f701fc6a8ebe39dcb95e7c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2017 17:35:02 +0200 Subject: pids: make task_tgid_nr_ns() safe This was reported many times, and this was even mentioned in commit 52ee2dfdd4f5 ("pids: refactor vnr/nr_ns helpers to make them safe") but somehow nobody bothered to fix the obvious problem: task_tgid_nr_ns() is not safe because task->group_leader points to nowhere after the exiting task passes exit_notify(), rcu_read_lock() can not help. We really need to change __unhash_process() to nullify group_leader, parent, and real_parent, but this needs some cleanups. Until then we can turn task_tgid_nr_ns() into another user of __task_pid_nr_ns() and fix the problem. Reported-by: Troy Kensinger Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/pid.h | 4 +++- include/linux/sched.h | 51 +++++++++++++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index 4d179316e431..719582744a2e 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -8,7 +8,9 @@ enum pid_type PIDTYPE_PID, PIDTYPE_PGID, PIDTYPE_SID, - PIDTYPE_MAX + PIDTYPE_MAX, + /* only valid to __task_pid_nr_ns() */ + __PIDTYPE_TGID }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 8337e2db0bb2..c05ac5f5aa03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,13 +1163,6 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); - -static inline pid_t task_tgid_vnr(struct task_struct *tsk) -{ - return pid_vnr(task_tgid(tsk)); -} - /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. @@ -1185,23 +1178,6 @@ static inline int pid_alive(const struct task_struct *p) return p->pids[PIDTYPE_PID].pid != NULL; } -static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) -{ - pid_t pid = 0; - - rcu_read_lock(); - if (pid_alive(tsk)) - pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); - rcu_read_unlock(); - - return pid; -} - -static inline pid_t task_ppid_nr(const struct task_struct *tsk) -{ - return task_ppid_nr_ns(tsk, &init_pid_ns); -} - static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); @@ -1223,6 +1199,33 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns); +} + +static inline pid_t task_tgid_vnr(struct task_struct *tsk) +{ + return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL); +} + +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + /* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { -- cgit From 7d3f0cd43feea1636dd7746f22fe8249b34d1b79 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 18 Aug 2017 15:23:24 +0800 Subject: net: sched: Add the invalid handle check in qdisc_class_find Add the invalid handle "0" check to avoid unnecessary search, because the qdisc uses the skb->priority as the handle value to look up, and it is "0" usually. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5865db91976b..107c52432245 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -393,6 +393,9 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) struct Qdisc_class_common *cl; unsigned int h; + if (!id) + return NULL; + h = qdisc_class_hash(id, hash->hashmask); hlist_for_each_entry(cl, &hash->hash[h], hnode) { if (cl->classid == id) -- cgit From 9762e7ff163213e18ed8e7cc723d74b54bf9cb98 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Fri, 18 Aug 2017 11:49:24 +0800 Subject: clk: rockchip: add rk3228 sclk_sdio_src ID This patch exports sdio src clock for dts reference. Signed-off-by: Elaine Zhang Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3228-cru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3228-cru.h b/include/dt-bindings/clock/rk3228-cru.h index 56f841c22801..55655ab0a4c4 100644 --- a/include/dt-bindings/clock/rk3228-cru.h +++ b/include/dt-bindings/clock/rk3228-cru.h @@ -49,6 +49,7 @@ #define SCLK_EMMC_DRV 117 #define SCLK_SDMMC_SAMPLE 118 #define SCLK_SDIO_SAMPLE 119 +#define SCLK_SDIO_SRC 120 #define SCLK_EMMC_SAMPLE 121 #define SCLK_VOP 122 #define SCLK_HDMI_HDCP 123 -- cgit From 1858698e0ac63f4fd3a633064893b5fd6ef8aa8d Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Mon, 21 Aug 2017 16:16:04 +0800 Subject: clk: rockchip: add rv1108 ACLK_GAMC and PCLK_GMAC ID This patch exports gmac aclk and pclk for dts reference. Signed-off-by: Elaine Zhang Reviewed-by: David Wu Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rv1108-cru.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h index f269d833e41a..2239ae2a19b9 100644 --- a/include/dt-bindings/clock/rv1108-cru.h +++ b/include/dt-bindings/clock/rv1108-cru.h @@ -110,6 +110,7 @@ #define ACLK_CIF2 207 #define ACLK_CIF3 208 #define ACLK_PERI 209 +#define ACLK_GMAC 210 /* pclk gates */ #define PCLK_GPIO1 256 @@ -141,6 +142,7 @@ #define PCLK_EFUSE0 282 #define PCLK_EFUSE1 283 #define PCLK_WDT 284 +#define PCLK_GMAC 285 /* hclk gates */ #define HCLK_I2S0_8CH 320 -- cgit From c56f16dab0dfc8a37bc6133f2c2a02ffb873648b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 11 Aug 2017 18:00:15 +0800 Subject: f2fs: add tracepoint for f2fs_gc This patch adds tracepoint for f2fs_gc. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 107 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index db5c98256147..8955e75dd48e 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -569,6 +569,113 @@ TRACE_EVENT(f2fs_background_gc, __entry->free) ); +TRACE_EVENT(f2fs_gc_begin, + + TP_PROTO(struct super_block *sb, bool sync, bool background, + long long dirty_nodes, long long dirty_dents, + long long dirty_imeta, unsigned int free_sec, + unsigned int free_seg, int reserved_seg, + unsigned int prefree_seg), + + TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta, + free_sec, free_seg, reserved_seg, prefree_seg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(bool, sync) + __field(bool, background) + __field(long long, dirty_nodes) + __field(long long, dirty_dents) + __field(long long, dirty_imeta) + __field(unsigned int, free_sec) + __field(unsigned int, free_seg) + __field(int, reserved_seg) + __field(unsigned int, prefree_seg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->sync = sync; + __entry->background = background; + __entry->dirty_nodes = dirty_nodes; + __entry->dirty_dents = dirty_dents; + __entry->dirty_imeta = dirty_imeta; + __entry->free_sec = free_sec; + __entry->free_seg = free_seg; + __entry->reserved_seg = reserved_seg; + __entry->prefree_seg = prefree_seg; + ), + + TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, " + "dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, " + "rsv_seg:%d, prefree_seg:%u", + show_dev(__entry->dev), + __entry->sync, + __entry->background, + __entry->dirty_nodes, + __entry->dirty_dents, + __entry->dirty_imeta, + __entry->free_sec, + __entry->free_seg, + __entry->reserved_seg, + __entry->prefree_seg) +); + +TRACE_EVENT(f2fs_gc_end, + + TP_PROTO(struct super_block *sb, int ret, int seg_freed, + int sec_freed, long long dirty_nodes, + long long dirty_dents, long long dirty_imeta, + unsigned int free_sec, unsigned int free_seg, + int reserved_seg, unsigned int prefree_seg), + + TP_ARGS(sb, ret, seg_freed, sec_freed, dirty_nodes, dirty_dents, + dirty_imeta, free_sec, free_seg, reserved_seg, prefree_seg), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, ret) + __field(int, seg_freed) + __field(int, sec_freed) + __field(long long, dirty_nodes) + __field(long long, dirty_dents) + __field(long long, dirty_imeta) + __field(unsigned int, free_sec) + __field(unsigned int, free_seg) + __field(int, reserved_seg) + __field(unsigned int, prefree_seg) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ret = ret; + __entry->seg_freed = seg_freed; + __entry->sec_freed = sec_freed; + __entry->dirty_nodes = dirty_nodes; + __entry->dirty_dents = dirty_dents; + __entry->dirty_imeta = dirty_imeta; + __entry->free_sec = free_sec; + __entry->free_seg = free_seg; + __entry->reserved_seg = reserved_seg; + __entry->prefree_seg = prefree_seg; + ), + + TP_printk("dev = (%d,%d), ret = %d, seg_freed = %d, sec_freed = %d, " + "nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, " + "free_seg:%u, rsv_seg:%d, prefree_seg:%u", + show_dev(__entry->dev), + __entry->ret, + __entry->seg_freed, + __entry->sec_freed, + __entry->dirty_nodes, + __entry->dirty_dents, + __entry->dirty_imeta, + __entry->free_sec, + __entry->free_seg, + __entry->reserved_seg, + __entry->prefree_seg) +); + TRACE_EVENT(f2fs_get_victim, TP_PROTO(struct super_block *sb, int type, int gc_type, -- cgit From c7d0045b08a36c2fb7874efc48d747613c6a1ccf Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Mon, 21 Aug 2017 16:16:06 +0800 Subject: clk: rockchip: rename rv1108 macphy clock to mac This MAC has no internal phy for rv1108 and the whole clock infrastructure hasn't been used yet, so is safe to fix. Signed-off-by: Elaine Zhang Reviewed-by: David Wu Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rv1108-cru.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rv1108-cru.h b/include/dt-bindings/clock/rv1108-cru.h index 2239ae2a19b9..d8d0e0456dc2 100644 --- a/include/dt-bindings/clock/rv1108-cru.h +++ b/include/dt-bindings/clock/rv1108-cru.h @@ -67,9 +67,9 @@ #define SCLK_SPI 108 #define SCLK_SARADC 109 #define SCLK_TSADC 110 -#define SCLK_MACPHY_PRE 111 -#define SCLK_MACPHY 112 -#define SCLK_MACPHY_RX 113 +#define SCLK_MAC_PRE 111 +#define SCLK_MAC 112 +#define SCLK_MAC_RX 113 #define SCLK_MAC_REF 114 #define SCLK_MAC_REFOUT 115 #define SCLK_DSP_PFM 116 -- cgit From c678fa66341c7b82a57cfed0ba3656162e970f99 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 21 Aug 2017 10:23:13 -0700 Subject: dmaengine: remove DMA_SG as it is dead code in kernel There are no in kernel consumers for DMA_SG op. Removing operation, dead code, and test code in dmatest. Signed-off-by: Dave Jiang Reviewed-by: Linus Walleij Cc: Gary Hook Cc: Ludovic Desroches Cc: Kedareswara rao Appana Cc: Li Yang Cc: Michal Simek Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 533680860865..64fbd380c430 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -68,7 +68,6 @@ enum dma_transaction_type { DMA_MEMSET, DMA_MEMSET_SG, DMA_INTERRUPT, - DMA_SG, DMA_PRIVATE, DMA_ASYNC_TX, DMA_SLAVE, @@ -771,11 +770,6 @@ struct dma_device { unsigned int nents, int value, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_sg)( - struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_slave_sg)( struct dma_chan *chan, struct scatterlist *sgl, @@ -905,19 +899,6 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy( len, flags); } -static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( - struct dma_chan *chan, - struct scatterlist *dst_sg, unsigned int dst_nents, - struct scatterlist *src_sg, unsigned int src_nents, - unsigned long flags) -{ - if (!chan || !chan->device || !chan->device->device_prep_dma_sg) - return NULL; - - return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents, - src_sg, src_nents, flags); -} - /** * dmaengine_terminate_all() - Terminate all active DMA transfers * @chan: The channel for which to terminate the transfers -- cgit From 6f7473c524cc4f875dcd9397ec9a6ec039bd08b6 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 10 Aug 2017 14:53:52 +0200 Subject: crypto: hash - add crypto_(un)register_ahashes() There are already helpers to (un)register multiple normal and AEAD algos. Add one for ahashes too. Signed-off-by: Lars Persson Signed-off-by: Rabin Vincent Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f6d9af3efa45..f0b44c16e88f 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -76,6 +76,8 @@ static inline int crypto_ahash_walk_last(struct crypto_hash_walk *walk) int crypto_register_ahash(struct ahash_alg *alg); int crypto_unregister_ahash(struct ahash_alg *alg); +int crypto_register_ahashes(struct ahash_alg *algs, int count); +void crypto_unregister_ahashes(struct ahash_alg *algs, int count); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); void ahash_free_instance(struct crypto_instance *inst); -- cgit From 67dfabe3a01be6b0cc4c0056aa546f0279ed6279 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 21 Aug 2017 16:54:01 +0200 Subject: mfd: da9052: Add register details for TSI Add register details an channels definition for using the TSI registers in the hwmon driver. Signed-off-by: Sebastian Reichel Signed-off-by: Lee Jones --- include/linux/mfd/da9052/da9052.h | 6 ++++++ include/linux/mfd/da9052/reg.h | 11 ++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index ce9230af09c2..ae5b663836d0 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -45,6 +45,12 @@ #define DA9052_ADC_TJUNC 8 #define DA9052_ADC_VBBAT 9 +/* TSI channel has its own 4 channel mux */ +#define DA9052_ADC_TSI_XP 70 +#define DA9052_ADC_TSI_XN 71 +#define DA9052_ADC_TSI_YP 72 +#define DA9052_ADC_TSI_YN 73 + #define DA9052_IRQ_DCIN 0 #define DA9052_IRQ_VBUS 1 #define DA9052_IRQ_DCINREM 2 diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h index 5010f978725c..76780ea8849c 100644 --- a/include/linux/mfd/da9052/reg.h +++ b/include/linux/mfd/da9052/reg.h @@ -690,7 +690,10 @@ /* TSI CONTROL REGISTER B BITS */ #define DA9052_TSICONTB_ADCREF 0X80 #define DA9052_TSICONTB_TSIMAN 0X40 -#define DA9052_TSICONTB_TSIMUX 0X30 +#define DA9052_TSICONTB_TSIMUX_XP 0X00 +#define DA9052_TSICONTB_TSIMUX_YP 0X10 +#define DA9052_TSICONTB_TSIMUX_XN 0X20 +#define DA9052_TSICONTB_TSIMUX_YN 0X30 #define DA9052_TSICONTB_TSISEL3 0X08 #define DA9052_TSICONTB_TSISEL2 0X04 #define DA9052_TSICONTB_TSISEL1 0X02 @@ -705,8 +708,14 @@ /* TSI CO-ORDINATE LSB RESULT REGISTER BITS */ #define DA9052_TSILSB_PENDOWN 0X40 #define DA9052_TSILSB_TSIZL 0X30 +#define DA9052_TSILSB_TSIZL_SHIFT 4 +#define DA9052_TSILSB_TSIZL_BITS 2 #define DA9052_TSILSB_TSIYL 0X0C +#define DA9052_TSILSB_TSIYL_SHIFT 2 +#define DA9052_TSILSB_TSIYL_BITS 2 #define DA9052_TSILSB_TSIXL 0X03 +#define DA9052_TSILSB_TSIXL_SHIFT 0 +#define DA9052_TSILSB_TSIXL_BITS 2 /* TSI Z MEASUREMENT MSB RESULT REGISTER BIT */ #define DA9052_TSIZMSB_TSIZM 0XFF -- cgit From a9467d954226f1a513cfe789a3a39d8fc73b5d16 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:15 +0800 Subject: iommu/mediatek: Move MTK_M4U_TO_LARB/PORT into mtk_iommu.c The definition of MTK_M4U_TO_LARB and MTK_M4U_TO_PORT are shared by all the gen2 M4U HWs. Thus, Move them out from mt8173-larb-port.h, and put them into the c file. Suggested-by: Honghui Zhang Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- include/dt-bindings/memory/mt8173-larb-port.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/memory/mt8173-larb-port.h b/include/dt-bindings/memory/mt8173-larb-port.h index 5fef5d1f8f82..111b4b0ec85a 100644 --- a/include/dt-bindings/memory/mt8173-larb-port.h +++ b/include/dt-bindings/memory/mt8173-larb-port.h @@ -15,10 +15,6 @@ #define __DTS_IOMMU_PORT_MT8173_H #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port)) -/* Local arbiter ID */ -#define MTK_M4U_TO_LARB(id) (((id) >> 5) & 0x7) -/* PortID within the local arbiter */ -#define MTK_M4U_TO_PORT(id) ((id) & 0x1f) #define M4U_LARB0_ID 0 #define M4U_LARB1_ID 1 -- cgit From e6dec92308628cff5f1f8bd1bcdf87581c9dc676 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 21 Aug 2017 19:00:16 +0800 Subject: iommu/mediatek: Add mt2712 IOMMU support The M4U IP blocks in mt2712 is MTK's generation2 M4U which use the ARM Short-descriptor like mt8173, and most of the HW registers are the same. The difference is that there are 2 M4U HWs in mt2712 while there's only one in mt8173. The purpose of 2 M4U HWs is for balance the bandwidth. Normally if there are 2 M4U HWs, there should be 2 iommu domains, each M4U has a iommu domain. Signed-off-by: Yong Wu Signed-off-by: Joerg Roedel --- include/soc/mediatek/smi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index 8893c5eacd07..5201e9022c86 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -19,7 +19,7 @@ #ifdef CONFIG_MTK_SMI -#define MTK_LARB_NR_MAX 8 +#define MTK_LARB_NR_MAX 16 #define MTK_SMI_MMU_EN(port) BIT(port) -- cgit From 18c90df9f2c00cb35ab8ba747aa0f742ee6bbf6a Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 22 Aug 2017 13:46:59 +0200 Subject: mlx5: Replace PCI pool old API The PCI pool API is deprecated. This commit replaces the PCI pool old API by the appropriate function with the DMA pool API. Signed-off-by: Romain Perier Reviewed-by: Peter Senna Tschudin Acked-by: Doug Ledford Tested-by: Doug Ledford Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 099fe311f272..db40bc4055c7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -299,7 +299,7 @@ struct mlx5_cmd { struct semaphore pages_sem; int mode; struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; - struct pci_pool *pool; + struct dma_pool *pool; struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; -- cgit From c5cff8561d2d0006e972bd114afd51f082fee77c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 21 Aug 2017 09:47:10 -0700 Subject: ipv6: add rcu grace period before freeing fib6_node We currently keep rt->rt6i_node pointing to the fib6_node for the route. And some functions make use of this pointer to dereference the fib6_node from rt structure, e.g. rt6_check(). However, as there is neither refcount nor rcu taken when dereferencing rt->rt6i_node, it could potentially cause crashes as rt->rt6i_node could be set to NULL by other CPUs when doing a route deletion. This patch introduces an rcu grace period before freeing fib6_node and makes sure the functions that dereference it takes rcu_read_lock(). Note: there is no "Fixes" tag because this bug was there in a very early stage. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1a88008cc6f5..e9c59db92942 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -70,6 +70,7 @@ struct fib6_node { __u16 fn_flags; int fn_sernum; struct rt6_info *rr_ptr; + struct rcu_head rcu; }; #ifndef CONFIG_IPV6_SUBTREES @@ -167,13 +168,40 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } +/* Function to safely get fn->sernum for passed in rt + * and store result in passed in cookie. + * Return true if we can get cookie safely + * Return false if not + */ +static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, + u32 *cookie) +{ + struct fib6_node *fn; + bool status = false; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + + if (fn) { + *cookie = fn->fn_sernum; + status = true; + } + + rcu_read_unlock(); + return status; +} + static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + u32 cookie = 0; + if (rt->rt6i_flags & RTF_PCPU || (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); - return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + rt6_get_cookie_safe(rt, &cookie); + + return cookie; } static inline void ip6_rt_put(struct rt6_info *rt) -- cgit From 16570d3da0938e0c46c31e5f97c9c8452025d2e7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 4 Aug 2017 13:52:20 -0700 Subject: IB/hfi1: Remove pmtu from the QP structure The pmtu field doens't have be stored in the QP structure as it can easily be calculated when needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 07e2fffa6de6..8fbafb0ce674 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -277,7 +277,6 @@ struct rvt_qp { unsigned long timeout_jiffies; /* computed from timeout */ - enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; -- cgit From 13c19222889daf91da36b7fb63b5d5d9ce89b377 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:53:51 -0700 Subject: IB/rdmavt, hfi1, qib: Modify check_ah() to account for extended LIDs rvt_check_ah() delegates lid verification to underlying driver. Underlying driver uses different conditions to check for dlid depending on whether the device supports extended LIDs Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_addr.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 9b5e642cf550..8d3ad4ecbea1 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -48,10 +48,21 @@ #ifndef OPA_ADDR_H #define OPA_ADDR_H +#include + #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) +/** + * 0xF8 - 4 bits of multicast range and 1 bit for collective range + * Example: For 24 bit LID space, + * Multicast range: 0xF00000 to 0xF7FFFF + * Collective range: 0xF80000 to 0xFFFFFE + */ +#define OPA_MCAST_NR 0x4 /* Number of top bits set */ +#define OPA_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ + /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that @@ -95,4 +106,11 @@ static inline bool opa_is_extended_lid(u32 dlid, u32 slid) else return false; } + +/* Get multicast lid base */ +static inline u32 opa_get_mcast_base(u32 nr_top_bits) +{ + return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); +} + #endif /* OPA_ADDR_H */ -- cgit From 72c07e2b671eda1cf3e8ebabc664f542f673b997 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:53:58 -0700 Subject: IB/hfi1: Add support to receive 16B bypass packets We introduce a struct hfi1_16b_header to support 16B headers. 16B bypass packets are received by the driver and processed similar to 9B packets. Add basic support to handle 16B packets. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_vnic.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index 39d6890616a6..0c07a70bd7f6 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -54,9 +54,6 @@ #include -/* VNIC uses 16B header format */ -#define OPA_VNIC_L2_TYPE 0x2 - /* 16 header bytes + 2 reserved bytes */ #define OPA_VNIC_L2_HDR_LEN (16 + 2) -- cgit From d98bb7f7e6fa29d45008370084d5cabac7ac69ed Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:16 -0700 Subject: IB/hfi1: Determine 9B/16B L2 header type based on Address handle When address handle attributes are initialized, the LIDs are transformed to be in the 32 bit LID space. When constructing the header, hfi1 driver will look at the LID to determine the packet header to be created. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 15 +++++++++++++++ include/rdma/opa_addr.h | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 70a183179224..8f263930c56f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -864,6 +864,7 @@ struct roce_ah_attr { struct opa_ah_attr { u32 dlid; u8 src_path_bits; + bool make_grd; }; struct rdma_ah_attr { @@ -3625,6 +3626,20 @@ static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr) return 0; } +static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr, + bool make_grd) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + attr->opa.make_grd = make_grd; +} + +static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + return attr->opa.make_grd; + return false; +} + static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) { attr->port_num = port_num; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 8d3ad4ecbea1..9ae126fb8648 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -71,7 +71,7 @@ * * @gid: The Global identifier */ -static inline bool ib_is_opa_gid(union ib_gid *gid) +static inline bool ib_is_opa_gid(const union ib_gid *gid) { return ((be64_to_cpu(gid->global.interface_id) >> 40) == OPA_SPECIAL_OUI); @@ -84,7 +84,7 @@ static inline bool ib_is_opa_gid(union ib_gid *gid) * * @gid: The Global identifier */ -static inline u32 opa_get_lid_from_gid(union ib_gid *gid) +static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } -- cgit From 88733e3b845024cb2324a68469a4a25fdd9c0a6c Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:23 -0700 Subject: IB/hfi1: Add 16B UD support Add 16B bypass packet support for UD traffic types. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/opa_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 9ae126fb8648..e6e90f18e6d5 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -54,6 +54,7 @@ #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) +#define OPA_GID_INDEX 0x1 /** * 0xF8 - 4 bits of multicast range and 1 bit for collective range * Example: For 24 bit LID space, -- cgit From 51e658f5dd362cc8666f3f5ec1986660e3e51047 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Fri, 4 Aug 2017 13:54:35 -0700 Subject: IB/rdmavt, hfi1, qib: Enhance rdmavt and hfi1 to use 32 bit lids Increase lid used in hfi1 driver to 32 bits. qib continues to use 16 bit lids. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fdfac0fd2f82..1d94f3c264ba 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -91,7 +91,7 @@ struct rvt_ibport { __be16 pma_counter_select[5]; u16 pma_tag; u16 mkey_lease_period; - u16 sm_lid; + u32 sm_lid; u8 sm_sl; u8 mkeyprot; u8 subnet_timeout; -- cgit From ec0d8b8a63ee760bca1bccc6769d6210e05ded29 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugame Date: Sun, 13 Aug 2017 08:08:46 -0700 Subject: IB/hfi1: Stricter bounds checking of MAD trap index The macro size is valid. This change makes it less ambiguous. Bounds check trap type for better security. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdma_vt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1d94f3c264ba..1ba84a78f1c5 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -64,7 +64,7 @@ #define RVT_MAX_PKEY_VALUES 16 #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ -#define RVT_MAX_TRAP_LISTS ((IB_NOTICE_TYPE_INFO & 0x0F) + 1) +#define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/ #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ struct trap_list { -- cgit From e3bf14bdc17a8e917f337760cc7cacf3232d7dbc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Aug 2017 14:57:39 -0600 Subject: rdma: Autoload netlink client modules If a message comes in and we do not have the client in the table, then try to load the module supplying that client using MODULE_ALIAS to find it. This duplicates the scheme seen in other netlink muxes (eg nfnetlink). Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/rdma_netlink.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index e25bf1988846..2d878596b1e0 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -17,6 +17,18 @@ enum rdma_nl_flags { RDMA_NL_ADMIN_PERM = 1 << 0, }; +/* Define this module as providing netlink services for NETLINK_RDMA, with + * index _index. Since the client indexes were setup in a uapi header as an + * enum and we do no want to change that, the user must supply the expanded + * constant as well and the compiler checks they are the same. + */ +#define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ + static inline void __chk_##_index(void) \ + { \ + BUILD_BUG_ON(_index != _val); \ + } \ + MODULE_ALIAS("rdma-netlink-subsys-" __stringify(_val)) + /** * Register client in RDMA netlink. * @index: Index of the added client -- cgit From 111993692741a7044e6c01b428cecf1071de3d0b Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 21 Aug 2017 23:33:49 -0700 Subject: tcp: Remove the unused parameter for tcp_try_fastopen. Signed-off-by: Tonghao Zhang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index afdab3781425..a995004ae946 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1533,8 +1533,7 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct dst_entry *dst); + struct tcp_fastopen_cookie *foc); void tcp_fastopen_init_key_once(bool publish); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); -- cgit From 84e54fe0a5eaed696dee4019c396f8396f5a908b Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 22 Aug 2017 09:40:28 -0700 Subject: gre: introduce native tunnel support for ERSPAN The patch adds ERSPAN type II tunnel support. The implementation is based on the draft at [1]. One of the purposes is for Linux box to be able to receive ERSPAN monitoring traffic sent from the Cisco switch, by creating a ERSPAN tunnel device. In addition, the patch also adds ERSPAN TX, so Linux virtual switch can redirect monitored traffic to the ERSPAN tunnel device. The traffic will be encapsulated into ERSPAN and sent out. The implementation reuses tunnel key as ERSPAN session ID, and field 'erspan' as ERSPAN Index fields: ./ip link add dev ers11 type erspan seq key 100 erspan 123 \ local 172.16.1.200 remote 172.16.1.100 To use the above device as ERSPAN receiver, configure Nexus 5000 switch as below: monitor session 100 type erspan-source erspan-id 123 vrf default destination ip 172.16.1.200 source interface Ethernet1/11 both source interface Ethernet1/12 both no shut monitor erspan origin ip-address 172.16.1.100 global [1] https://tools.ietf.org/html/draft-foschiano-erspan-01 [2] iproute2 patch: http://marc.info/?l=linux-netdev&m=150306086924951&w=2 [3] test script: http://marc.info/?l=linux-netdev&m=150231021807304&w=2 Signed-off-by: William Tu Signed-off-by: Meenakshi Vohra Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- include/net/erspan.h | 61 ++++++++++++++++++++++++++++++++++++++++++ include/net/ip_tunnels.h | 3 +++ include/uapi/linux/if_ether.h | 1 + include/uapi/linux/if_tunnel.h | 1 + 4 files changed, 66 insertions(+) create mode 100644 include/net/erspan.h (limited to 'include') diff --git a/include/net/erspan.h b/include/net/erspan.h new file mode 100644 index 000000000000..ca94fc86865e --- /dev/null +++ b/include/net/erspan.h @@ -0,0 +1,61 @@ +#ifndef __LINUX_ERSPAN_H +#define __LINUX_ERSPAN_H + +/* + * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number (increments per packet per session) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Note that in the above GRE header [RFC1701] out of the C, R, K, S, + * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The + * other fields are set to zero, so only a sequence number follows. + * + * ERSPAN Type II header (8 octets [42:49]) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver | VLAN | COS | En|T| Session ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB + */ + +#define ERSPAN_VERSION 0x1 + +#define VER_MASK 0xf000 +#define VLAN_MASK 0x0fff +#define COS_MASK 0xe000 +#define EN_MASK 0x1800 +#define T_MASK 0x0400 +#define ID_MASK 0x03ff +#define INDEX_MASK 0xfffff + +enum erspan_encap_type { + ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ + ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ + ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ + ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ +}; + +struct erspan_metadata { + __be32 index; /* type II */ +}; + +struct erspanhdr { + __be16 ver_vlan; +#define VER_OFFSET 12 + __be16 session_id; +#define COS_OFFSET 13 +#define EN_OFFSET 11 +#define T_OFFSET 10 + struct erspan_metadata md; +}; + +#endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 520809912f03..625c29329372 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -115,6 +115,9 @@ struct ip_tunnel { u32 o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ + /* This field used only by ERSPAN */ + u32 index; /* ERSPAN type II index */ + struct dst_cache dst_cache; struct ip_tunnel_parm parms; diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 5bc9bfd816b7..efeb1190c2ca 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -66,6 +66,7 @@ #define ETH_P_ATALK 0x809B /* Appletalk DDP */ #define ETH_P_AARP 0x80F3 /* Appletalk AARP */ #define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */ +#define ETH_P_ERSPAN 0x88BE /* ERSPAN type II */ #define ETH_P_IPX 0x8137 /* IPX over DIX */ #define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ #define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */ diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 6792d1967d31..2e520883c054 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -134,6 +134,7 @@ enum { IFLA_GRE_COLLECT_METADATA, IFLA_GRE_IGNORE_DF, IFLA_GRE_FWMARK, + IFLA_GRE_ERSPAN_INDEX, __IFLA_GRE_MAX, }; -- cgit From 5cdcf4c6a638591ec0e98c57404a19e7f9997567 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 28 Jul 2017 13:56:06 +0200 Subject: ANDROID: binder: add padding to binder_fd_array_object. binder_fd_array_object starts with a 4-byte header, followed by a few fields that are 8 bytes when ANDROID_BINDER_IPC_32BIT=N. This can cause alignment issues in a 64-bit kernel with a 32-bit userspace, as on x86_32 an 8-byte primitive may be aligned to a 4-byte address. Pad with a __u32 to fix this. Signed-off-by: Martijn Coenen Cc: stable # 4.11+ Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 51f891fb1b18..7668b5791c91 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -132,6 +132,7 @@ enum { /* struct binder_fd_array_object - object describing an array of fds in a buffer * @hdr: common header structure + * @pad: padding to ensure correct alignment * @num_fds: number of file descriptors in the buffer * @parent: index in offset array to buffer holding the fd array * @parent_offset: start offset of fd array in the buffer @@ -152,6 +153,7 @@ enum { */ struct binder_fd_array_object { struct binder_object_header hdr; + __u32 pad; binder_size_t num_fds; binder_size_t parent; binder_size_t parent_offset; -- cgit From 3946d18765be0b0fe278bea4f87a54cf13858019 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 14 Aug 2017 21:59:55 -0700 Subject: gpio: add gpio_add_lookup_tables() to add several tables at once When converting legacy board to use gpiod API() there might be several lookup tables in board file, let's provide a way to register them all at once. Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Dmitry Torokhov Signed-off-by: Linus Walleij --- include/linux/gpio/machine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index 6e76b16fcade..ba4ccfd900f9 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -60,11 +60,14 @@ struct gpiod_lookup_table { #ifdef CONFIG_GPIOLIB void gpiod_add_lookup_table(struct gpiod_lookup_table *table); +void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); #else static inline void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} static inline +void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) {} +static inline void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} #endif -- cgit From 0edc23ea2692fe75e941ec00867e661eb15f67fa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:01:52 +0000 Subject: irqchip/gic-v3: Add VLPI/DirectLPI discovery Add helper functions that probe for VLPI and DirectLPI properties. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6a1f87ff94e2..20a553423ac7 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -204,6 +204,7 @@ #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DirectLPIS (1U << 3) #define GICR_TYPER_LAST (1U << 4) #define GIC_V3_REDIST_SIZE 0x20000 @@ -487,6 +488,8 @@ struct rdists { struct page *prop_page; int id_bits; u64 flags; + bool has_vlpis; + bool has_direct_lpi; }; struct irq_domain; -- cgit From 3dfa576bfb453482314b596931a59a4951428058 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 17:25:54 +0000 Subject: irqchip/gic-v3-its: Add probing for VLPI properties Add the probing code for the ITS VLPI support. This includes configuring the ITS number if not supporting the single VMOVP command feature. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 20a553423ac7..af8c55105fc2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -235,15 +235,20 @@ #define GITS_TRANSLATER 0x10040 #define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) #define GITS_CTLR_QUIESCENT (1U << 31) #define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) #define GITS_TYPER_HWCOLLCNT_SHIFT 24 +#define GITS_TYPER_VMOVP (1ULL << 37) #define GITS_IIDR_REV_SHIFT 12 #define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) -- cgit From 5158bd5597e4c0939db3ecefbcbf492c425e611c Mon Sep 17 00:00:00 2001 From: Jean-Louis Thekekara Date: Thu, 29 Jun 2017 19:08:30 +0200 Subject: mtd: nand: remove hard-coded NAND ids length This commit removes hard-coded '8' used for looping into struct nand_chip.id.data array. NAND_MAX_ID_LEN has been introduced by Artem Bityutskiy in 53552d22bfe1f for defining ids length in nand_flash_ids[] list. This commit unifies ids length in nand base driver. Signed-off-by: Jean-Louis Thekekara Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8a6522e82f03..297684013977 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -439,14 +439,16 @@ struct nand_jedec_params { __le16 crc; } __packed; +/* The maximum expected count of bytes in the NAND ID sequence */ +#define NAND_MAX_ID_LEN 8 + /** * struct nand_id - NAND id structure - * @data: buffer containing the id bytes. Currently 8 bytes large, but can - * be extended if required. + * @data: buffer containing the id bytes. * @len: ID length. */ struct nand_id { - u8 data[8]; + u8 data[NAND_MAX_ID_LEN]; int len; }; @@ -1018,8 +1020,6 @@ static inline void *nand_get_manufacturer_data(struct nand_chip *chip) #define NAND_MFR_ATO 0x9b #define NAND_MFR_WINBOND 0xef -/* The maximum expected count of bytes in the NAND ID sequence */ -#define NAND_MAX_ID_LEN 8 /* * A helper for defining older NAND chips where the second ID byte fully -- cgit From e59ad6ff83b444517067046bd837595a1ab84900 Mon Sep 17 00:00:00 2001 From: Andrea Adami Date: Mon, 14 Aug 2017 22:48:33 +0200 Subject: mtd: nand: sharpsl: Add partition parsers platform data With the introduction of sharpslpart partition parser we can now read the offsets from NAND: we specify the list of the parsers as platform data, with cmdlinepart and ofpart parsers first allowing to override the part. table written in NAND. This is done in the board files using this driver. Thus, we need to extend sharpsl_nand_platform_data to consider the partition parsers. Signed-off-by: Andrea Adami Signed-off-by: Boris Brezillon --- include/linux/mtd/sharpsl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h index 65e91d0fa981..6381a7d6c289 100644 --- a/include/linux/mtd/sharpsl.h +++ b/include/linux/mtd/sharpsl.h @@ -17,4 +17,5 @@ struct sharpsl_nand_platform_data { const struct mtd_ooblayout_ops *ecc_layout; struct mtd_partition *partitions; unsigned int nr_partitions; + const char *const *part_parsers; }; -- cgit From 827dba9d626325a65727c4074913b2d3c7e7ea4d Mon Sep 17 00:00:00 2001 From: Andrea Adami Date: Mon, 14 Aug 2017 22:48:34 +0200 Subject: mfd: tmio: Add partition parsers platform data With the introduction of sharpslpart partition parser we can now read the offsets from NAND: we specify the list of the parsers as platform data, with cmdlinepart and ofpart parsers first allowing to override the part. table written in NAND. This is done in the board files using this driver. Thus, we need to extend tmio_nand_data to consider the partition parsers. Signed-off-by: Andrea Adami Acked-by: Lee Jones Acked-by: Wolfram Sang Signed-off-by: Boris Brezillon --- include/linux/mfd/tmio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 26e8f8c0a6db..357b6cfdf34a 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -139,6 +139,7 @@ struct tmio_nand_data { struct nand_bbt_descr *badblock_pattern; struct mtd_partition *partition; unsigned int num_partitions; + const char *const *part_parsers; }; #define FBIO_TMIO_ACC_WRITE 0x7C639300 -- cgit From c2ee070fb00365d7841f6661dcdc7fbe6620bdf8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Aug 2017 19:10:31 +0200 Subject: block: cache the partition index in struct block_device Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..706dd3a972d2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -427,6 +427,7 @@ struct block_device { #endif struct block_device * bd_contains; unsigned bd_block_size; + u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; -- cgit From 74d46992e0d9dee7f1f376de0d56d31614c8a17a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Aug 2017 19:10:32 +0200 Subject: block: replace bi_bdev with a gendisk pointer and partitions index This way we don't need a block_device structure to submit I/O. The block_device has different life time rules from the gendisk and request_queue and is usually only available when the block device node is open. Other callers need to explicitly create one (e.g. the lightnvm passthrough code, or the new nvme multipathing code). For the actual I/O path all that we need is the gendisk, which exists once per block device. But given that the block layer also does partition remapping we additionally need a partition index, which is used for said remapping in generic_make_request. Note that all the block drivers generally want request_queue or sometimes the gendisk, so this removes a layer of indirection all over the stack. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 18 ++++++++++++++++++ include/linux/blk_types.h | 3 ++- include/trace/events/bcache.h | 6 +++--- include/trace/events/block.h | 16 ++++++++-------- include/trace/events/f2fs.h | 2 +- 5 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9276788a9b24..a8fe7935332f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -494,6 +494,24 @@ extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +#define bio_set_dev(bio, bdev) \ +do { \ + (bio)->bi_disk = (bdev)->bd_disk; \ + (bio)->bi_partno = (bdev)->bd_partno; \ +} while (0) + +#define bio_copy_dev(dst, src) \ +do { \ + (dst)->bi_disk = (src)->bi_disk; \ + (dst)->bi_partno = (src)->bi_partno; \ +} while (0) + +#define bio_dev(bio) \ + disk_devt((bio)->bi_disk) + +#define bio_devname(bio, buf) \ + __bdevname(bio_dev(bio), (buf)) + #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_current(struct bio *bio); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d2eb87c84d82..a2d2aa709cef 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -48,7 +48,8 @@ struct blk_issue_stat { */ struct bio { struct bio *bi_next; /* request queue link */ - struct block_device *bi_bdev; + struct gendisk *bi_disk; + u8 bi_partno; blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index df3e9ae5ad8d..daf749138ff8 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(bcache_request, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->orig_major = d->disk->major; __entry->orig_minor = d->disk->first_minor; __entry->sector = bio->bi_iter.bi_sector; @@ -98,7 +98,7 @@ DECLARE_EVENT_CLASS(bcache_bio, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -133,7 +133,7 @@ TRACE_EVENT(bcache_read, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d0dbe60d8a6d..f815aaaef755 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -236,8 +236,7 @@ TRACE_EVENT(block_bio_bounce, ), TP_fast_assign( - __entry->dev = bio->bi_bdev ? - bio->bi_bdev->bd_dev : 0; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -274,7 +273,7 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = error; @@ -302,7 +301,7 @@ DECLARE_EVENT_CLASS(block_bio_merge, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -369,7 +368,7 @@ TRACE_EVENT(block_bio_queue, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -397,7 +396,8 @@ DECLARE_EVENT_CLASS(block_get_rq, ), TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->dev = bio ? bio_dev(bio) : 0; + __entry->dev = bio_dev(bio); __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, @@ -532,7 +532,7 @@ TRACE_EVENT(block_split, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -573,7 +573,7 @@ TRACE_EVENT(block_bio_remap, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 6f77a2755abb..bc4dd7837e4c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(f2fs__bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->target = bio->bi_bdev->bd_dev; + __entry->target = bio_dev(bio); __entry->op = bio_op(bio); __entry->op_flags = bio->bi_opf; __entry->type = type; -- cgit From 1e6ec9ea89d30739b9447c1860fcb07fc29f3aef Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 23 Aug 2017 14:54:59 -0700 Subject: Revert "loop: support 4k physical blocksize" There's some stuff still up in the air, let's not get stuck with a subpar ABI. I'll follow up with something better for 4.14. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index a3960f98679c..c8125ec1f4f2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -22,7 +22,6 @@ enum { LO_FLAGS_AUTOCLEAR = 4, LO_FLAGS_PARTSCAN = 8, LO_FLAGS_DIRECT_IO = 16, - LO_FLAGS_BLOCKSIZE = 32, }; #include /* for __kernel_old_dev_t */ @@ -60,8 +59,6 @@ struct loop_info64 { __u64 lo_init[2]; }; -#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0] - /* * Loop filter types */ -- cgit From 69a6beab085264cdcdb9e3160b1a2ae61b2efde1 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 14 Aug 2017 12:26:34 +0200 Subject: clk: msm8996-gcc: add missing smmu clks This patch adds missing LPASS smmu clks which are required by the audio driver. Signed-off-by: Srinivas Kandagatla Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/qcom,gcc-msm8996.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,gcc-msm8996.h b/include/dt-bindings/clock/qcom,gcc-msm8996.h index 1f5c42254798..75b07cf5eed0 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8996.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8996.h @@ -233,6 +233,8 @@ #define GCC_PCIE_CLKREF_CLK 216 #define GCC_RX2_USB2_CLKREF_CLK 217 #define GCC_RX1_USB2_CLKREF_CLK 218 +#define GCC_HLOS1_VOTE_LPASS_CORE_SMMU_CLK 219 +#define GCC_HLOS1_VOTE_LPASS_ADSP_SMMU_CLK 220 #define GCC_SYSTEM_NOC_BCR 0 #define GCC_CONFIG_NOC_BCR 1 -- cgit From 143c97cc652949893c8056c679012f0aeccb80e5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Aug 2017 18:16:11 -0700 Subject: Revert "pty: fix the cached path of the pty slave file descriptor in the master" This reverts commit c8c03f1858331e85d397bacccd34ef409aae993c. It turns out that while fixing the ptmx file descriptor to have the correct 'struct path' to the associated slave pty is a really good thing, it breaks some user space tools for a very annoying reason. The problem is that /dev/ptmx and its associated slave pty (/dev/pts/X) are on different mounts. That was what caused us to have the wrong path in the first place (we would mix up the vfsmount of the 'ptmx' node, with the dentry of the pty slave node), but it also means that now while we use the right vfsmount, having the pty master open also keeps the pts mount busy. And it turn sout that that makes 'pbuilder' very unhappy, as noted by Stefan Lippers-Hollmann: "This patch introduces a regression for me when using pbuilder 0.228.7[2] (a helper to build Debian packages in a chroot and to create and update its chroots) when trying to umount /dev/ptmx (inside the chroot) on Debian/ unstable (full log and pbuilder configuration file[3] attached). [...] Setting up build-essential (12.3) ... Processing triggers for libc-bin (2.24-15) ... I: unmounting dev/ptmx filesystem W: Could not unmount dev/ptmx: umount: /var/cache/pbuilder/build/1340/dev/ptmx: target is busy (In some cases useful info about processes that use the device is found by lsof(8) or fuser(1).)" apparently pbuilder tries to unmount the /dev/pts filesystem while still holding at least one master node open, which is arguably not very nice, but we don't break user space even when fixing other bugs. So this commit has to be reverted. I'll try to figure out a way to avoid caching the path to the slave pty in the master pty. The only thing that actually wants that slave pty path is the "TIOCGPTPEER" ioctl, and I think we could just recreate the path at that time. Reported-by: Stefan Lippers-Hollmann Cc: Eric W Biederman Cc: Christian Brauner Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 7883e901f65c..277ab9af9ac2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,7 +19,7 @@ struct pts_fs_info; -struct pts_fs_info *devpts_acquire(struct file *, struct vfsmount **ptsmnt); +struct pts_fs_info *devpts_acquire(struct file *); void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); -- cgit From c4335fdd38227788178953c101b77180504d7ea0 Mon Sep 17 00:00:00 2001 From: gengdongjiu Date: Thu, 17 Aug 2017 20:07:18 +0800 Subject: ACPI: APEI: fix the wrong iteration of generic error status block The revision 0x300 generic error data entry is different from the old version, but currently iterating through the GHES estatus blocks does not take into account this difference. This will lead to failure to get the right data entry if GHES has revision 0x300 error data entry. Update the GHES estatus iteration macro to properly increment using acpi_hest_get_next(), and correct the iteration termination condition because the status block data length only includes error data length. Convert the CPER estatus checking and printing iteration logic to use same macro. Signed-off-by: Dongjiu Geng Tested-by: Tyler Baicar Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- include/acpi/ghes.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 9f26e01186ae..9061c5c743b3 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -113,6 +113,11 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) return (void *)(gdata) + acpi_hest_get_record_size(gdata); } +#define apei_estatus_for_each_section(estatus, section) \ + for (section = (struct acpi_hest_generic_data *)(estatus + 1); \ + (void *)section - (void *)(estatus + 1) < estatus->data_length; \ + section = acpi_hest_get_next(section)) + int ghes_notify_sea(void); #endif /* GHES_H */ -- cgit From 98aaa913b4ed250324429f0a9e6d5f77a3b5276c Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Tue, 22 Aug 2017 17:08:48 -0400 Subject: tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg When SOF_TIMESTAMPING_RX_SOFTWARE is enabled for tcp sockets, return the timestamp corresponding to the highest sequence number data returned. Previously the skb->tstamp is overwritten when a TCP packet is placed in the out of order queue. While the packet is in the ooo queue, save the timestamp in the TCB_SKB_CB. This space is shared with the gso_* options which are only used on the tx path, and a previously unused 4 byte hole. When skbs are coalesced either in the sk_receive_queue or the out_of_order_queue always choose the timestamp of the appended skb to maintain the invariant of returning the timestamp of the last byte in the recvmsg buffer. Signed-off-by: Mike Maloney Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/tcp.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a995004ae946..c614ff135b66 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -774,6 +774,12 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; + + /* Used to stash the receive timestamp while this skb is in the + * out of order queue, as skb->tstamp is overwritten by the + * rbnode. + */ + ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ @@ -790,7 +796,8 @@ struct tcp_skb_cb { __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ eor:1, /* Is skb MSG_EOR marked? */ - unused:6; + has_rxtstamp:1, /* SKB has a RX timestamp */ + unused:5; __u32 ack_seq; /* Sequence number ACK'd */ union { struct { -- cgit From cd0a137acbb66208368353723f5f1480995cf1c4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Aug 2017 15:12:14 -0700 Subject: net: core: Specify skb_pad()/skb_put_padto() SKB freeing Rename skb_pad() into __skb_pad() and make it take a third argument: free_on_error which controls whether kfree_skb() should be called or not, skb_pad() directly makes use of it and passes true to preserve its existing behavior. Do exactly the same thing with __skb_put_padto() and skb_put_padto(). Suggested-by: David Miller Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- include/linux/skbuff.h | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dbe29b6c9bd6..d67a8182e5eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -973,7 +973,23 @@ int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); -int skb_pad(struct sk_buff *skb, int pad); +int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error); + +/** + * skb_pad - zero pad the tail of an skb + * @skb: buffer to pad + * @pad: space to pad + * + * Ensure that a buffer is followed by a padding area that is zero + * filled. Used by network drivers which may DMA or transfer data + * beyond the buffer end onto the wire. + * + * May return error in out of memory cases. The skb is freed on error. + */ +static inline int skb_pad(struct sk_buff *skb, int pad) +{ + return __skb_pad(skb, pad, true); +} #define dev_kfree_skb(a) consume_skb(a) int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, @@ -2825,25 +2841,42 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length + * @free_on_error: free buffer on error * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on - * success. The skb is freed on error. + * success. The skb is freed on error if @free_on_error is true. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, + bool free_on_error) { unsigned int size = skb->len; if (unlikely(size < len)) { len -= size; - if (skb_pad(skb, len)) + if (__skb_pad(skb, len, free_on_error)) return -ENOMEM; __skb_put(skb, len); } return 0; } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + return __skb_put_padto(skb, len, true); +} + static inline int skb_add_data(struct sk_buff *skb, struct iov_iter *from, int copy) { -- cgit From f9cbe9a556afca9e82df9aebe4412d93769566b5 Mon Sep 17 00:00:00 2001 From: Antoine Ténart Date: Wed, 23 Aug 2017 09:46:54 +0200 Subject: net: define the TSO header size in net/tso.h The TSO header size was defined in many drivers. Factorize the code and define its size in net/tso.h. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/net/tso.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tso.h b/include/net/tso.h index b7be852bfe9d..9a56c39e6d0a 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -3,6 +3,8 @@ #include +#define TSO_HEADER_SIZE 128 + struct tso_t { int next_frag_idx; void *data; -- cgit From e457d86ada27cbd2f46ded75d4b4bc06e26d0e2e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 23 Aug 2017 10:08:19 +0200 Subject: net: sched: add couple of goto_chain helpers Add helpers to find out if a gact instance is goto_chain termination action and to get chain index. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index d576374c4d6f..41afe1ce7b16 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -15,7 +15,8 @@ struct tcf_gact { }; #define to_gact(a) ((struct tcf_gact *)a) -static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) +static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, + bool is_ext) { #ifdef CONFIG_NET_CLS_ACT struct tcf_gact *gact; @@ -24,7 +25,8 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) return false; gact = to_gact(a); - if (gact->tcf_action == act) + if ((!is_ext && gact->tcf_action == act) || + (is_ext && TC_ACT_EXT_CMP(gact->tcf_action, act))) return true; #endif @@ -33,12 +35,22 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act) static inline bool is_tcf_gact_shot(const struct tc_action *a) { - return __is_tcf_gact_act(a, TC_ACT_SHOT); + return __is_tcf_gact_act(a, TC_ACT_SHOT, false); } static inline bool is_tcf_gact_trap(const struct tc_action *a) { - return __is_tcf_gact_act(a, TC_ACT_TRAP); + return __is_tcf_gact_act(a, TC_ACT_TRAP, false); +} + +static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_GOTO_CHAIN, true); +} + +static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) +{ + return a->goto_chain->index; } #endif /* __NET_TC_GACT_H */ -- cgit From 1b688a19a92223cf2d1892c9d05d64dc397b33e3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 23 Aug 2017 15:10:50 +0100 Subject: bpf/verifier: remove varlen_map_value_access flag The optimisation it does is broken when the 'new' register value has a variable offset and the 'old' was constant. I broke it with my pointer types unification (see Fixes tag below), before which the 'new' value would have type PTR_TO_MAP_VALUE_ADJ and would thus not compare equal; other changes in that patch mean that its original behaviour (ignore min/max values) cannot be restored. Tests on a sample set of cilium programs show no change in count of processed instructions. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 91d07efed2ba..d8f131a36fd0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -125,7 +125,6 @@ struct bpf_verifier_env { u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool seen_direct_write; - bool varlen_map_value_access; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; -- cgit From 8e9cd9ce90d48369b2c5ddd79fe3d4a4cb1ccb56 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 23 Aug 2017 15:11:21 +0100 Subject: bpf/verifier: document liveness analysis The liveness tracking algorithm is quite subtle; add comments to explain it. Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d8f131a36fd0..b8d200f60a40 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -21,6 +21,19 @@ */ #define BPF_MAX_VAR_SIZ INT_MAX +/* Liveness marks, used for registers and spilled-regs (in stack slots). + * Read marks propagate upwards until they find a write mark; they record that + * "one of this state's descendants read this reg" (and therefore the reg is + * relevant for states_equal() checks). + * Write marks collect downwards and do not propagate; they record that "the + * straight-line code that reached this state (from its parent) wrote this reg" + * (and therefore that reads propagated from this state or its descendants + * should not propagate to its parent). + * A state with a write mark can receive read marks; it just won't propagate + * them to its parent, since the write mark is a property, not of the state, + * but of the link between it and its parent. See mark_reg_read() and + * mark_stack_slot_read() in kernel/bpf/verifier.c. + */ enum bpf_reg_liveness { REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */ REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */ -- cgit From ecda85e70277ef24e44a1f6bc00243cebd19f985 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 16 Aug 2017 19:31:57 +0200 Subject: x86/lguest: Remove lguest support Lguest seems to be rather unused these days. It has seen only patches ensuring it still builds the last two years and its official state is "Odd Fixes". Remove it in order to be able to clean up the paravirt code. Signed-off-by: Juergen Gross Acked-by: Rusty Russell Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: boris.ostrovsky@oracle.com Cc: lguest@lists.ozlabs.org Cc: rusty@rustcorp.com.au Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20170816173157.8633-3-jgross@suse.com Signed-off-by: Ingo Molnar --- include/linux/lguest.h | 73 ---------------------------------------- include/linux/lguest_launcher.h | 44 ------------------------ include/uapi/linux/virtio_ring.h | 4 +-- 3 files changed, 2 insertions(+), 119 deletions(-) delete mode 100644 include/linux/lguest.h delete mode 100644 include/linux/lguest_launcher.h (limited to 'include') diff --git a/include/linux/lguest.h b/include/linux/lguest.h deleted file mode 100644 index 6db19f35f7c5..000000000000 --- a/include/linux/lguest.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Things the lguest guest needs to know. Note: like all lguest interfaces, - * this is subject to wild and random change between versions. - */ -#ifndef _LINUX_LGUEST_H -#define _LINUX_LGUEST_H - -#ifndef __ASSEMBLY__ -#include -#include -#include - -#define LG_CLOCK_MIN_DELTA 100UL -#define LG_CLOCK_MAX_DELTA ULONG_MAX - -/*G:031 - * The second method of communicating with the Host is to via "struct - * lguest_data". Once the Guest's initialization hypercall tells the Host where - * this is, the Guest and Host both publish information in it. -:*/ -struct lguest_data { - /* - * 512 == enabled (same as eflags in normal hardware). The Guest - * changes interrupts so often that a hypercall is too slow. - */ - unsigned int irq_enabled; - /* Fine-grained interrupt disabling by the Guest */ - DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS); - - /* - * The Host writes the virtual address of the last page fault here, - * which saves the Guest a hypercall. CR2 is the native register where - * this address would normally be found. - */ - unsigned long cr2; - - /* Wallclock time set by the Host. */ - struct timespec time; - - /* - * Interrupt pending set by the Host. The Guest should do a hypercall - * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). - */ - int irq_pending; - - /* - * Async hypercall ring. Instead of directly making hypercalls, we can - * place them in here for processing the next time the Host wants. - * This batching can be quite efficient. - */ - - /* 0xFF == done (set by Host), 0 == pending (set by Guest). */ - u8 hcall_status[LHCALL_RING_SIZE]; - /* The actual registers for the hypercalls. */ - struct hcall_args hcalls[LHCALL_RING_SIZE]; - -/* Fields initialized by the Host at boot: */ - /* Memory not to try to access */ - unsigned long reserve_mem; - /* KHz for the TSC clock. */ - u32 tsc_khz; - -/* Fields initialized by the Guest at boot: */ - /* Instruction to suppress interrupts even if enabled */ - unsigned long noirq_iret; - /* Address above which page tables are all identical. */ - unsigned long kernel_address; - /* The vector to try to use for system calls (0x40 or 0x80). */ - unsigned int syscall_vec; -}; -extern struct lguest_data lguest_data; -#endif /* __ASSEMBLY__ */ -#endif /* _LINUX_LGUEST_H */ diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h deleted file mode 100644 index acd5b12565cc..000000000000 --- a/include/linux/lguest_launcher.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _LINUX_LGUEST_LAUNCHER -#define _LINUX_LGUEST_LAUNCHER -/* Everything the "lguest" userspace program needs to know. */ -#include - -/*D:010 - * Drivers - * - * The Guest needs devices to do anything useful. Since we don't let it touch - * real devices (think of the damage it could do!) we provide virtual devices. - * We emulate a PCI bus with virtio devices on it; we used to have our own - * lguest bus which was far simpler, but this tests the virtio 1.0 standard. - * - * Virtio devices are also used by kvm, so we can simply reuse their optimized - * device drivers. And one day when everyone uses virtio, my plan will be - * complete. Bwahahahah! - */ - -/* Write command first word is a request. */ -enum lguest_req -{ - LHREQ_INITIALIZE, /* + base, pfnlimit, start */ - LHREQ_GETDMA, /* No longer used */ - LHREQ_IRQ, /* + irq */ - LHREQ_BREAK, /* No longer used */ - LHREQ_EVENTFD, /* No longer used. */ - LHREQ_GETREG, /* + offset within struct pt_regs (then read value). */ - LHREQ_SETREG, /* + offset within struct pt_regs, value. */ - LHREQ_TRAP, /* + trap number to deliver to guest. */ -}; - -/* - * This is what read() of the lguest fd populates. trap == - * LGUEST_TRAP_ENTRY for an LHCALL_NOTIFY (addr is the - * argument), 14 for a page fault in the MMIO region (addr is - * the trap address, insn is the instruction), or 13 for a GPF - * (insn is the instruction). - */ -struct lguest_pending { - __u8 trap; - __u8 insn[7]; - __u32 addr; -}; -#endif /* _LINUX_LGUEST_LAUNCHER */ diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index c07295969b7e..6d5d5faa989b 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -1,7 +1,7 @@ #ifndef _UAPI_LINUX_VIRTIO_RING_H #define _UAPI_LINUX_VIRTIO_RING_H -/* An interface for efficient virtio implementation, currently for use by KVM - * and lguest, but hopefully others soon. Do NOT change this since it will +/* An interface for efficient virtio implementation, currently for use by KVM, + * but hopefully others soon. Do NOT change this since it will * break existing servers and clients. * * This header is BSD licensed so anyone can use the definitions to implement -- cgit From c84f5683f6e9fee78e054431d89121225ccb7464 Mon Sep 17 00:00:00 2001 From: Priit Laes Date: Wed, 23 Aug 2017 20:23:29 +0300 Subject: clk: sunxi-ng: Add sun4i/sun7i CCU driver Introduce a clock controller driver for sun4i A10 and sun7i A20 series SoCs. Signed-off-by: Priit Laes Signed-off-by: Maxime Ripard --- include/dt-bindings/clock/sun4i-a10-ccu.h | 200 ++++++++++++++++++++++++++++++ include/dt-bindings/clock/sun7i-a20-ccu.h | 53 ++++++++ include/dt-bindings/reset/sun4i-a10-ccu.h | 69 +++++++++++ 3 files changed, 322 insertions(+) create mode 100644 include/dt-bindings/clock/sun4i-a10-ccu.h create mode 100644 include/dt-bindings/clock/sun7i-a20-ccu.h create mode 100644 include/dt-bindings/reset/sun4i-a10-ccu.h (limited to 'include') diff --git a/include/dt-bindings/clock/sun4i-a10-ccu.h b/include/dt-bindings/clock/sun4i-a10-ccu.h new file mode 100644 index 000000000000..c5a53f38d654 --- /dev/null +++ b/include/dt-bindings/clock/sun4i-a10-ccu.h @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2017 Priit Laes + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_CLK_SUN4I_A10_H_ +#define _DT_BINDINGS_CLK_SUN4I_A10_H_ + +#define CLK_HOSC 1 +#define CLK_CPU 20 + +/* AHB Gates */ +#define CLK_AHB_OTG 26 +#define CLK_AHB_EHCI0 27 +#define CLK_AHB_OHCI0 28 +#define CLK_AHB_EHCI1 29 +#define CLK_AHB_OHCI1 30 +#define CLK_AHB_SS 31 +#define CLK_AHB_DMA 32 +#define CLK_AHB_BIST 33 +#define CLK_AHB_MMC0 34 +#define CLK_AHB_MMC1 35 +#define CLK_AHB_MMC2 36 +#define CLK_AHB_MMC3 37 +#define CLK_AHB_MS 38 +#define CLK_AHB_NAND 39 +#define CLK_AHB_SDRAM 40 +#define CLK_AHB_ACE 41 +#define CLK_AHB_EMAC 42 +#define CLK_AHB_TS 43 +#define CLK_AHB_SPI0 44 +#define CLK_AHB_SPI1 45 +#define CLK_AHB_SPI2 46 +#define CLK_AHB_SPI3 47 +#define CLK_AHB_PATA 48 +#define CLK_AHB_SATA 49 +#define CLK_AHB_GPS 50 +#define CLK_AHB_HSTIMER 51 +#define CLK_AHB_VE 52 +#define CLK_AHB_TVD 53 +#define CLK_AHB_TVE0 54 +#define CLK_AHB_TVE1 55 +#define CLK_AHB_LCD0 56 +#define CLK_AHB_LCD1 57 +#define CLK_AHB_CSI0 58 +#define CLK_AHB_CSI1 59 +#define CLK_AHB_HDMI0 60 +#define CLK_AHB_HDMI1 61 +#define CLK_AHB_DE_BE0 62 +#define CLK_AHB_DE_BE1 63 +#define CLK_AHB_DE_FE0 64 +#define CLK_AHB_DE_FE1 65 +#define CLK_AHB_GMAC 66 +#define CLK_AHB_MP 67 +#define CLK_AHB_GPU 68 + +/* APB0 Gates */ +#define CLK_APB0_CODEC 69 +#define CLK_APB0_SPDIF 70 +#define CLK_APB0_I2S0 71 +#define CLK_APB0_AC97 72 +#define CLK_APB0_I2S1 73 +#define CLK_APB0_PIO 74 +#define CLK_APB0_IR0 75 +#define CLK_APB0_IR1 76 +#define CLK_APB0_I2S2 77 +#define CLK_APB0_KEYPAD 78 + +/* APB1 Gates */ +#define CLK_APB1_I2C0 79 +#define CLK_APB1_I2C1 80 +#define CLK_APB1_I2C2 81 +#define CLK_APB1_I2C3 82 +#define CLK_APB1_CAN 83 +#define CLK_APB1_SCR 84 +#define CLK_APB1_PS20 85 +#define CLK_APB1_PS21 86 +#define CLK_APB1_I2C4 87 +#define CLK_APB1_UART0 88 +#define CLK_APB1_UART1 89 +#define CLK_APB1_UART2 90 +#define CLK_APB1_UART3 91 +#define CLK_APB1_UART4 92 +#define CLK_APB1_UART5 93 +#define CLK_APB1_UART6 94 +#define CLK_APB1_UART7 95 + +/* IP clocks */ +#define CLK_NAND 96 +#define CLK_MS 97 +#define CLK_MMC0 98 +#define CLK_MMC0_OUTPUT 99 +#define CLK_MMC0_SAMPLE 100 +#define CLK_MMC1 101 +#define CLK_MMC1_OUTPUT 102 +#define CLK_MMC1_SAMPLE 103 +#define CLK_MMC2 104 +#define CLK_MMC2_OUTPUT 105 +#define CLK_MMC2_SAMPLE 106 +#define CLK_MMC3 107 +#define CLK_MMC3_OUTPUT 108 +#define CLK_MMC3_SAMPLE 109 +#define CLK_TS 110 +#define CLK_SS 111 +#define CLK_SPI0 112 +#define CLK_SPI1 113 +#define CLK_SPI2 114 +#define CLK_PATA 115 +#define CLK_IR0 116 +#define CLK_IR1 117 +#define CLK_I2S0 118 +#define CLK_AC97 119 +#define CLK_SPDIF 120 +#define CLK_KEYPAD 121 +#define CLK_SATA 122 +#define CLK_USB_OHCI0 123 +#define CLK_USB_OHCI1 124 +#define CLK_USB_PHY 125 +#define CLK_GPS 126 +#define CLK_SPI3 127 +#define CLK_I2S1 128 +#define CLK_I2S2 129 + +/* DRAM Gates */ +#define CLK_DRAM_VE 130 +#define CLK_DRAM_CSI0 131 +#define CLK_DRAM_CSI1 132 +#define CLK_DRAM_TS 133 +#define CLK_DRAM_TVD 134 +#define CLK_DRAM_TVE0 135 +#define CLK_DRAM_TVE1 136 +#define CLK_DRAM_OUT 137 +#define CLK_DRAM_DE_FE1 138 +#define CLK_DRAM_DE_FE0 139 +#define CLK_DRAM_DE_BE0 140 +#define CLK_DRAM_DE_BE1 141 +#define CLK_DRAM_MP 142 +#define CLK_DRAM_ACE 143 + +/* Display Engine Clocks */ +#define CLK_DE_BE0 144 +#define CLK_DE_BE1 145 +#define CLK_DE_FE0 146 +#define CLK_DE_FE1 147 +#define CLK_DE_MP 148 +#define CLK_TCON0_CH0 149 +#define CLK_TCON1_CH0 150 +#define CLK_CSI_SCLK 151 +#define CLK_TVD_SCLK2 152 +#define CLK_TVD 153 +#define CLK_TCON0_CH1_SCLK2 154 +#define CLK_TCON0_CH1 155 +#define CLK_TCON1_CH1_SCLK2 156 +#define CLK_TCON1_CH1 157 +#define CLK_CSI0 158 +#define CLK_CSI1 159 +#define CLK_CODEC 160 +#define CLK_VE 161 +#define CLK_AVS 162 +#define CLK_ACE 163 +#define CLK_HDMI 164 +#define CLK_GPU 165 + +#endif /* _DT_BINDINGS_CLK_SUN4I_A10_H_ */ diff --git a/include/dt-bindings/clock/sun7i-a20-ccu.h b/include/dt-bindings/clock/sun7i-a20-ccu.h new file mode 100644 index 000000000000..045a5178da0c --- /dev/null +++ b/include/dt-bindings/clock/sun7i-a20-ccu.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 Priit Laes + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_CLK_SUN7I_A20_H_ +#define _DT_BINDINGS_CLK_SUN7I_A20_H_ + +#include + +#define CLK_MBUS 166 +#define CLK_HDMI1_SLOW 167 +#define CLK_HDMI1 168 +#define CLK_OUT_A 169 +#define CLK_OUT_B 170 + +#endif /* _DT_BINDINGS_CLK_SUN7I_A20_H_ */ diff --git a/include/dt-bindings/reset/sun4i-a10-ccu.h b/include/dt-bindings/reset/sun4i-a10-ccu.h new file mode 100644 index 000000000000..5f4480bedc8a --- /dev/null +++ b/include/dt-bindings/reset/sun4i-a10-ccu.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2017 Priit Laes + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DT_BINDINGS_RST_SUN4I_A10_H +#define _DT_BINDINGS_RST_SUN4I_A10_H + +#define RST_USB_PHY0 1 +#define RST_USB_PHY1 2 +#define RST_USB_PHY2 3 +#define RST_GPS 4 +#define RST_DE_BE0 5 +#define RST_DE_BE1 6 +#define RST_DE_FE0 7 +#define RST_DE_FE1 8 +#define RST_DE_MP 9 +#define RST_TVE0 10 +#define RST_TCON0 11 +#define RST_TVE1 12 +#define RST_TCON1 13 +#define RST_CSI0 14 +#define RST_CSI1 15 +#define RST_VE 16 +#define RST_ACE 17 +#define RST_LVDS 18 +#define RST_GPU 19 +#define RST_HDMI_H 20 +#define RST_HDMI_SYS 21 +#define RST_HDMI_AUDIO_DMA 22 + +#endif /* DT_BINDINGS_RST_SUN4I_A10_H */ -- cgit From 581821ae7f7e2c4547945c65f1bcd357f5915aa5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 16 Aug 2017 13:32:39 +0300 Subject: usb: chipidea: udc: Support SKB alignment quirk NVIDIA Tegra20 UDC can't cope with unaligned DMA and require a USB gadget quirk that avoids SKB buffer alignment to be set in order to make Ethernet Gadget working. Later Tegra generations do not require that quirk. Let's add a new platform data flag that allows to enable USB gadget quirk for platforms that require it. Signed-off-by: Dmitry Osipenko Acked-by: Peter Chen Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index c5fdfcf99828..d725cff7268d 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -58,6 +58,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_OVERRIDE_TX_BURST BIT(10) #define CI_HDRC_OVERRIDE_RX_BURST BIT(11) #define CI_HDRC_OVERRIDE_PHY_CONTROL BIT(12) /* Glue layer manages phy */ +#define CI_HDRC_REQUIRES_ALIGNED_DMA BIT(13) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 -- cgit From 07533c6765de05199417ec73f9c2a495ddd29473 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 21 Aug 2017 17:54:21 +0300 Subject: net/mlx5: Remove a leftover unused variable mlx5_core_wq is no longer being used and should be removed from the code. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d26f18b39c4a..d5b6f6a9fcc5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -890,8 +890,6 @@ static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) return buf->direct.buf + offset; } -extern struct workqueue_struct *mlx5_core_wq; - #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field -- cgit From 667cb65ae5ad01523505d48d6cfd92bd1d3c9785 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 7 Aug 2017 11:14:11 +0300 Subject: net/mlx5: Don't store reserved part in FTEs and FGs The current code stores fte_match_param in the software representation of FTEs and FGs. fte_match_param contains a large reserved area at the bottom of the struct. Since downstream patches are going to hash this part, we would like to avoid doing so on a reserved part. Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f31a0b5377e1..3c7442b56460 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -48,7 +48,7 @@ /* helper macros */ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) -#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld))) +#define __mlx5_bit_off(typ, fld) (offsetof(struct mlx5_ifc_##typ##_bits, fld)) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) -- cgit From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 24 Aug 2017 01:57:56 +0200 Subject: bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer Since we split the scsi_request out of struct request bsg fails to provide a reply-buffer for the drivers. This was done via the pointer for sense-data, that is not preallocated anymore. Failing to allocate/assign it results in illegal dereferences because LLDs use this pointer unquestioned. An example panic on s390x, using the zFCP driver, looks like this (I had debugging on, otherwise NULL-pointer dereferences wouldn't even panic on s390x): Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403 Fault in home space mode while using kernel ASCE. AS:0000000001590007 R3:0000000000000024 Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) task: 0000000065cb0100 task.stack: 0000000065cb4000 Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866 000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f 00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38 0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8 Krnl Code: 000003ff801e4146: e31020500004 lg %r1,80(%r2) 000003ff801e414c: 58402040 l %r4,64(%r2) #000003ff801e4150: e35020200004 lg %r5,32(%r2) >000003ff801e4156: 50405004 st %r4,4(%r5) 000003ff801e415a: e54c50080000 mvhi 8(%r5),0 000003ff801e4160: e33010280012 lt %r3,40(%r1) 000003ff801e4166: a718fffb lhi %r1,-5 000003ff801e416a: 1803 lr %r0,%r3 Call Trace: ([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp]) [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp] [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp] [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10 [<00000000001684c2>] tasklet_action+0x15a/0x1d8 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848 [<00000000001675a4>] irq_exit+0x74/0xf8 [<000000000010dd6a>] do_IRQ+0xba/0xf0 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4 [<00000000001033b6>] enabled_wait+0xb6/0x188 ([<000000000010339e>] enabled_wait+0x9e/0x188) [<000000000010396a>] arch_cpu_idle+0x32/0x50 [<0000000000bd0112>] default_idle_call+0x52/0x68 [<00000000001cd0fa>] do_idle+0x102/0x188 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48 [<0000000000118c64>] smp_start_secondary+0x11c/0x130 [<0000000000bd2016>] restart_int_handler+0x62/0x78 [<0000000000000000>] (null) INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt This patch moves bsg-lib to allocate and setup struct bsg_job ahead of time, including the allocation of a buffer for the reply-data. This means, struct bsg_job is not allocated separately anymore, but as part of struct request allocation - similar to struct scsi_cmd. Reflect this in the function names that used to handle creation/destruction of struct bsg_job. Reported-by: Steffen Maier Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request") Cc: #4.11+ Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - include/linux/bsg-lib.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..2a5d52fa90f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,7 +568,6 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; - int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..637a20cfb237 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -24,6 +24,7 @@ #define _BLK_BSG_ #include +#include struct request; struct device; @@ -37,6 +38,7 @@ struct bsg_buffer { }; struct bsg_job { + struct scsi_request sreq; struct device *dev; struct request *req; -- cgit From ea369ea6d828930c0cf0cacf81f6fd9dff61992d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 23 Aug 2017 02:33:04 +0200 Subject: rtc: remove .open() and .release() There are no driver left using .open and .release. There is no good use case for them as there is nothing the character device interface does that should not be done in the sysfs interface or in-kernel interface. Remove those callbacks now to avoid future confusion. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 0a0f0d14a5fb..e6d0f9c1cafd 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -72,8 +72,6 @@ extern struct class *rtc_class; * issued through ioctl() ... */ struct rtc_class_ops { - int (*open)(struct device *); - void (*release)(struct device *); int (*ioctl)(struct device *, unsigned int, unsigned long); int (*read_time)(struct device *, struct rtc_time *); int (*set_time)(struct device *, struct rtc_time *); -- cgit From ea5311c7e752dbec9bfbdd79992a8772b37f32fa Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 10 Aug 2017 10:54:31 -0600 Subject: PCI: Fix PCIe capability sizes PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 defines the size of the PCIe capability structure for v1 devices with link, but we also have a need in the vfio code for sizing the capability for devices without link, such as Root Complex Integrated Endpoints. Create a separate define for this ending the structure before the link fields. Additionally, this reveals that PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 is currently incorrect, ending the capability length before the v2 link fields. Rename this to specify an RC Integrated Endpoint (no link) capability length and move PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 to include the link fields as we have for the v1 version. Signed-off-by: Alex Williamson [bhelgaas: add "_" in "PCI_CAP_EXP_RC ENDPOINT_SIZEOF_V2 44"] Signed-off-by: Bjorn Helgaas Reviewed-by: Eric Auger --- include/uapi/linux/pci_regs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index c22d3ebaca20..e185d2d39ea6 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -513,6 +513,7 @@ #define PCI_EXP_DEVSTA_URD 0x0008 /* Unsupported Request Detected */ #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ @@ -556,7 +557,7 @@ #define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints end here */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ #define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ #define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ @@ -639,7 +640,7 @@ #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ -#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints end here */ +#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ #define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5.0GT/s */ @@ -647,6 +648,7 @@ #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ +#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ #define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ #define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ #define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ -- cgit From f20c4ea49ec4708de97248927ac6138c2d14eba9 Mon Sep 17 00:00:00 2001 From: Dongdong Liu Date: Sat, 19 Aug 2017 17:07:20 +0800 Subject: PCI/DPC: Add eDPC support Add eDPC support. Get and print the RP PIO error information when the trigger condition is RP PIO error. For more information on eDPC, please see PCI Express Base Specification Revision 3.1, section 6.2.10.3, or view the PCI-SIG eDPC ECN here: https://pcisig.com/sites/default/files/specification_documents/ECN_Enhanced_DPC_2012-11-19_final.pdf Signed-off-by: Dongdong Liu Signed-off-by: Bjorn Helgaas Reviewed-by: Keith Busch --- include/uapi/linux/pci_regs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index c22d3ebaca20..1ce96275531c 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -967,6 +967,7 @@ #define PCI_EXP_DPC_CAP_RP_EXT 0x20 /* Root Port Extensions for DPC */ #define PCI_EXP_DPC_CAP_POISONED_TLP 0x40 /* Poisoned TLP Egress Blocking Supported */ #define PCI_EXP_DPC_CAP_SW_TRIGGER 0x80 /* Software Triggering Supported */ +#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0xF00 /* RP PIO log size */ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ #define PCI_EXP_DPC_CTL 6 /* DPC control */ @@ -980,6 +981,15 @@ #define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ +#define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ +#define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO MASK */ +#define PCI_EXP_DPC_RP_PIO_SEVERITY 0x14 /* RP PIO Severity */ +#define PCI_EXP_DPC_RP_PIO_SYSERROR 0x18 /* RP PIO SysError */ +#define PCI_EXP_DPC_RP_PIO_EXCEPTION 0x1C /* RP PIO Exception */ +#define PCI_EXP_DPC_RP_PIO_HEADER_LOG 0x20 /* RP PIO Header Log */ +#define PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG 0x30 /* RP PIO ImpSpec Log */ +#define PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG 0x34 /* RP PIO TLP Prefix Log */ + /* Precision Time Measurement */ #define PCI_PTM_CAP 0x04 /* PTM Capability */ #define PCI_PTM_CAP_REQ 0x00000001 /* Requester capable */ -- cgit From 1177009131bee310421f5c04c43d3777cbacbdc8 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:39:59 +0200 Subject: devlink: Add Ethernet header for dpipe This will be used by the IPv4 host table which will be introduced in the following patches. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index ed7687bbf5d0..ddb8b5227580 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -328,6 +328,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action); int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); +extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; #else diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b0e807ac53bb..a855f8dcc8ee 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -226,4 +226,12 @@ enum devlink_dpipe_action_type { DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY, }; +enum devlink_dpipe_field_ethernet_id { + DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC, +}; + +enum devlink_dpipe_header_id { + DEVLINK_DPIPE_HEADER_ETHERNET, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit From 3fb886ecea93605a8ea14e258ff3158b8966781e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:00 +0200 Subject: devlink: Add IPv4 header for dpipe This will be used by the IPv4 host table which will be introduced in the following patches. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index ddb8b5227580..8ff8a6f77f29 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -329,6 +329,7 @@ int devlink_dpipe_action_put(struct sk_buff *skb, int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; #else diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index a855f8dcc8ee..6c172548589d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -230,8 +230,13 @@ enum devlink_dpipe_field_ethernet_id { DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC, }; +enum devlink_dpipe_field_ipv4_id { + DEVLINK_DPIPE_FIELD_IPV4_DST_IP, +}; + enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_ETHERNET, + DEVLINK_DPIPE_HEADER_IPV4, }; #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit From ffd3cdccf214cf0df08856a6738544076c4cd548 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:02 +0200 Subject: devlink: Add support for dynamic table size Up until now the dpipe table's size was static and known at registration time. The host table does not have constant size and it is resized in dynamic manner. In order to support this behavior the size is changed to be obtained dynamically via an op. This patch also adjust the current dpipe table for the new API. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 8ff8a6f77f29..e96272b2cfec 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -178,7 +178,6 @@ struct devlink_dpipe_table_ops; * struct devlink_dpipe_table - table object * @priv: private * @name: table name - * @size: maximum number of entries * @counters_enabled: indicates if counters are active * @counter_control_extern: indicates if counter control is in dpipe or * external tool @@ -189,7 +188,6 @@ struct devlink_dpipe_table { void *priv; struct list_head list; const char *name; - u64 size; bool counters_enabled; bool counter_control_extern; struct devlink_dpipe_table_ops *table_ops; @@ -204,6 +202,7 @@ struct devlink_dpipe_table { * @counters_set_update - when changing the counter status hardware sync * maybe needed to allocate/free counter related * resources + * @size_get - get size */ struct devlink_dpipe_table_ops { int (*actions_dump)(void *priv, struct sk_buff *skb); @@ -211,6 +210,7 @@ struct devlink_dpipe_table_ops { int (*entries_dump)(void *priv, bool counters_enabled, struct devlink_dpipe_dump_ctx *dump_ctx); int (*counters_set_update)(void *priv, bool enable); + u64 (*size_get)(void *priv); }; /** @@ -311,8 +311,7 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern); + void *priv, bool counter_control_extern); void devlink_dpipe_table_unregister(struct devlink *devlink, const char *table_name); int devlink_dpipe_headers_register(struct devlink *devlink, -- cgit From 3580732448f128c39e7325912bc4368ade5dce7d Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 24 Aug 2017 08:40:03 +0200 Subject: devlink: Move dpipe entry clear function into devlink The entry clear routine can be shared between the drivers, thus it is moved inside devlink. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index e96272b2cfec..047a0c54f652 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -323,6 +323,7 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx); int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, struct devlink_dpipe_entry *entry); int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx); +void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry); int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action); int devlink_dpipe_match_put(struct sk_buff *skb, @@ -448,6 +449,11 @@ devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) return 0; } +static inline void +devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) +{ +} + static inline int devlink_dpipe_action_put(struct sk_buff *skb, struct devlink_dpipe_action *action) -- cgit From 0d03510038bda70b5a4a252e8216822e6ce0cbdb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:02 +0200 Subject: netfilter: conntrack: compute l3proto nla size at compile time avoids a pointer and allows struct to be const later on. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 1b8de164d744..6a27ffea7480 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -20,6 +20,9 @@ struct nf_conntrack_l3proto { /* L3 Protocol Family number. ex) PF_INET */ u_int16_t l3proto; + /* size of tuple nlattr, fills a hole */ + u16 nla_size; + /* Protocol name */ const char *name; @@ -49,23 +52,17 @@ struct nf_conntrack_l3proto { int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); - - /* Called when netns wants to use connection tracking */ - int (*net_ns_get)(struct net *); - void (*net_ns_put)(struct net *); - - /* - * Calculate size of tuple nlattr - */ - int (*nlattr_tuple_size)(void); - int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; +#endif - size_t nla_size; + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); /* Module (if any) which this is connected to. */ struct module *me; -- cgit From a3134d537f8209f5b149d7ed9f287047158845f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:03 +0200 Subject: netfilter: conntrack: remove protocol name from l3proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6a27ffea7480..e31861e4fa6a 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -23,9 +23,6 @@ struct nf_conntrack_l3proto { /* size of tuple nlattr, fills a hole */ u16 nla_size; - /* Protocol name */ - const char *name; - /* * Try to fill in the third arg: nhoff is offset of l3 proto * hdr. Return true if possible. -- cgit From 09ec82f5af99d1e35614eb0844b920fc335a313d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:04 +0200 Subject: netfilter: conntrack: remove protocol name from l4proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index b6e27cafb1d9..47c16bae5e00 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -108,9 +108,6 @@ struct nf_conntrack_l4proto { /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); - /* Protocol name */ - const char *name; - /* Module (if any) which this is connected to. */ struct module *me; }; -- cgit From 036c69400a34cf8f6d39c88325dd510462809e7b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:05 +0200 Subject: netfilter: conntrack: reduce size of l4protocol trackers can use u16 for both, shrinks size by another 8 bytes. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 47c16bae5e00..15c58dd3f701 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -92,12 +92,12 @@ struct nf_conntrack_l4proto { #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { - size_t obj_size; int (*nlattr_to_obj)(struct nlattr *tb[], struct net *net, void *data); int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); - unsigned int nlattr_max; + u16 obj_size; + u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; #endif -- cgit From 91950833dd5a34ac6336aa88da6d43aaeb56ac6d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:06 +0200 Subject: netfilter: conntrack: place print_tuple in procfs part CONFIG_NF_CONNTRACK_PROCFS is deprecated, no need to use a function pointer in the trackers for this. Place the printf formatting in the one place that uses it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ---- include/net/netfilter/nf_conntrack_l4proto.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e31861e4fa6a..dabb53b0913c 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -37,10 +37,6 @@ struct nf_conntrack_l3proto { bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); - /* Print out the per-protocol part of the tuple. */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* * Called before tracking. * *dataoff: offset of protocol header (TCP, UDP,...) in skb diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 15c58dd3f701..7e8da04a5eb6 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,10 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the per-protocol part of the tuple. Return like seq_* */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); -- cgit From ea48cc83cf612fddb4e8868369348b9f936cfedb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:07 +0200 Subject: netfilter: conntrack: print_conntrack only needed if CONFIG_NF_CONNTRACK_PROCFS Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7e8da04a5eb6..4976ef92dc78 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,9 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the private part of the conntrack. */ - void (*print_conntrack)(struct seq_file *s, struct nf_conn *); - /* Return the array of timeouts for this protocol. */ unsigned int *(*get_timeouts)(struct net *net); @@ -96,6 +93,10 @@ struct nf_conntrack_l4proto { u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; +#endif +#ifdef CONFIG_NF_CONNTRACK_PROCFS + /* Print out the private part of the conntrack. */ + void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif unsigned int *net_id; /* Init l4proto pernet data */ -- cgit From b3480fe059ac9121b5714205b4ddae14b59ef4be Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:08 +0200 Subject: netfilter: conntrack: make protocol tracker pointers const Doesn't change generated code, but will make it easier to eventually make the actual trackers themselvers const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 6 +++--- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- include/net/netfilter/nf_conntrack_timeout.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index dabb53b0913c..6269deecbee7 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,10 +64,10 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; /* Protocol global registration. */ -int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); -void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); +int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto); +void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto); -struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); +const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 4976ef92dc78..d4933d56809d 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -114,10 +114,10 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 -struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, +const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto); -struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, +const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index b222957062b5..483d104fa254 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -16,7 +16,7 @@ struct ctnl_timeout { refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; __u16 l3num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; char data[0]; }; -- cgit From 2facaad6000f2322eb40ca379aced31c957f0a41 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 Aug 2017 12:33:08 +0200 Subject: xdp: make generic xdp redirect use tracepoint trace_xdp_redirect If the xdp_do_generic_redirect() call fails, it trigger the trace_xdp_exception tracepoint. It seems better to use the same tracepoint trace_xdp_redirect, as the native xdp_do_redirect{,_map} does. Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7015116331af..d29e58fde364 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -718,7 +718,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * because we only track one map and force a flush when the map changes. * This does not appear to be a real limitation for existing software. */ -int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb); +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog); -- cgit From a873585587205750e7accfb2c93c29239ffa6e09 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 Aug 2017 12:33:18 +0200 Subject: xdp: remove net_device names from xdp_redirect tracepoint There is too much overhead in the current trace_xdp_redirect tracepoint as it does strcpy and strlen on the net_device names. Besides, exposing the ifindex/index is actually the information that is needed in the tracepoint to diagnose issues. When a lookup fails (either ifindex or devmap index) then there is a need for saying which to_index that have issues. V2: Adjust args to be aligned with trace_xdp_exception. Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 0e42e69f773b..cd37706c6f55 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -51,33 +51,33 @@ TRACE_EVENT(xdp_exception, TRACE_EVENT(xdp_redirect, - TP_PROTO(const struct net_device *from, - const struct net_device *to, - const struct bpf_prog *xdp, u32 act, int err), + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, u32 act, + int to_index, int err), - TP_ARGS(from, to, xdp, act, err), + TP_ARGS(dev, xdp, act, to_index, err), TP_STRUCT__entry( - __string(name_from, from->name) - __string(name_to, to->name) __array(u8, prog_tag, 8) __field(u32, act) + __field(int, ifindex) + __field(int, to_index) __field(int, err) ), TP_fast_assign( BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __assign_str(name_from, from->name); - __assign_str(name_to, to->name); - __entry->act = act; - __entry->err = err; + __entry->act = act; + __entry->ifindex = dev->ifindex; + __entry->to_index = to_index; + __entry->err = err; ), - TP_printk("prog=%s from=%s to=%s action=%s err=%d", + TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d", __print_hex_str(__entry->prog_tag, 8), - __get_str(name_from), __get_str(name_to), __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_index, __entry->err) ); #endif /* _TRACE_XDP_H */ -- cgit From 315ec3990efd71f87e556cf7827a1ac2d565d5e8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 Aug 2017 12:33:23 +0200 Subject: xdp: get tracepoints xdp_exception and xdp_redirect in sync Remove the net_device string name from the xdp_exception tracepoint, like the xdp_redirect tracepoint. Align the TP_STRUCT to have common entries between these two tracepoint. Signed-off-by: Jesper Dangaard Brouer Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index cd37706c6f55..27cf2ef35f5f 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -31,22 +31,22 @@ TRACE_EVENT(xdp_exception, TP_ARGS(dev, xdp, act), TP_STRUCT__entry( - __string(name, dev->name) __array(u8, prog_tag, 8) __field(u32, act) + __field(int, ifindex) ), TP_fast_assign( BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __assign_str(name, dev->name); - __entry->act = act; + __entry->act = act; + __entry->ifindex = dev->ifindex; ), - TP_printk("prog=%s device=%s action=%s", + TP_printk("prog=%s action=%s ifindex=%d", __print_hex_str(__entry->prog_tag, 8), - __get_str(name), - __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB)) + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex) ); TRACE_EVENT(xdp_redirect, -- cgit From 498ca3c82a7b11e152a46c253f6b2087c929ce00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Wed, 23 Aug 2017 08:35:40 +0300 Subject: IB/core: Avoid accessing non-allocated memory when inferring port type Commit 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") introduced the concept of type in ah_attr: * During ib_register_device, each port is checked for its type which is stored in ib_device's port_immutable array. * During uverbs' modify_qp, the type is inferred using the port number in ib_uverbs_qp_dest struct (address vector) by accessing the relevant port_immutable array and the type is passed on to providers. IB spec (version 1.3) enforces a valid port value only in Reset to Init. During Init to RTR, the address vector must be valid but port number is not mentioned as a field in the address vector, so its value is not validated, which leads to accesses to a non-allocated memory when inferring the port type. Save the real port number in ib_qp during modify to Init (when the comp_mask indicates that the port number is valid) and use this value to infer the port type. Avoid copying the address vector fields if the matching bit is not set in the attr_mask. Address vector can't be modified before the port, so no valid flow is affected. Fixes: 44c58487d51a ('IB/core: Define 'ib' and 'roce' rdma_ah_attr types') Signed-off-by: Noa Osherovich Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..88c32aba32f7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1683,6 +1683,7 @@ struct ib_qp { enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; + u8 port; }; struct ib_mr { -- cgit From 311fc65c9fb9c966bca8e6f3ff8132ce57344ab9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 Aug 2017 15:13:29 -0500 Subject: pty: Repair TIOCGPTPEER The implementation of TIOCGPTPEER has two issues. When /dev/ptmx (as opposed to /dev/pts/ptmx) is opened the wrong vfsmount is passed to dentry_open. Which results in the kernel displaying the wrong pathname for the peer. The second is simply by caching the vfsmount and dentry of the peer it leaves them open, in a way they were not previously Which because of the inreased reference counts can cause unnecessary behaviour differences resulting in regressions. To fix these move the ioctl into tty_io.c at a generic level allowing the ioctl to have access to the struct file on which the ioctl is being called. This allows the path of the slave to be derived when opening the slave through TIOCGPTPEER instead of requiring the path to the slave be cached. Thus removing the need for caching the path. A new function devpts_ptmx_path is factored out of devpts_acquire and used to implement a function devpts_mntget. The new function devpts_mntget takes a filp to perform the lookup on and fsi so that it can confirm that the superblock that is found by devpts_ptmx_path is the proper superblock. v2: Lots of fixes to make the code actually work v3: Suggestions by Linus - Removed the unnecessary initialization of filp in ptm_open_peer - Simplified devpts_ptmx_path as gotos are no longer required [ This is the fix for the issue that was reverted in commit 143c97cc6529, but this time without breaking 'pbuilder' due to increased reference counts - Linus ] Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl") Reported-by: Christian Brauner Reported-and-tested-by: Stefan Lippers-Hollmann Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 277ab9af9ac2..100cb4343763 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,6 +19,7 @@ struct pts_fs_info; +struct vfsmount *devpts_mntget(struct file *, struct pts_fs_info *); struct pts_fs_info *devpts_acquire(struct file *); void devpts_release(struct pts_fs_info *); @@ -32,6 +33,15 @@ void *devpts_get_priv(struct dentry *); /* unlink */ void devpts_pty_kill(struct dentry *); +/* in pty.c */ +int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags); + +#else +static inline int +ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) +{ + return -EIO; +} #endif -- cgit From 55f2467cd717241dd941153d4db1e23c91aecf98 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 17 Aug 2017 15:50:35 +0300 Subject: RDMA/mlx4: Fix create qp command alignment Avoid extra padding by replacing the order of inl_recv_sz and reserved, otherwise 'mlx4_ib_create_qp' structure might be larger than legacy user input leading to copy of some garbage data from the user space buffer. Fixes: ea30b966f7dd ('IB/mlx4: Add inline-receive support') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index d915cab37ec3..21cce1a4c4dd 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -111,8 +111,8 @@ struct mlx4_ib_create_qp { __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; - __u32 inl_recv_sz; __u8 reserved; + __u32 inl_recv_sz; }; struct mlx4_ib_create_wq { -- cgit From dcc9881e6767559c04faf15804ac145a2ea026cb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:36 +0300 Subject: RDMA/(core, ulp): Convert register/unregister event handler to be void The functions ib_register_event_handler() and ib_unregister_event_handler() always returned success and they can't fail. Let's convert those functions to be void, remove redundant checks and cleanup tons of goto statements. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c155c105589d..e536a052e5dd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2413,8 +2413,8 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask, enum rdma_link_layer ll); -int ib_register_event_handler (struct ib_event_handler *event_handler); -int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_register_event_handler(struct ib_event_handler *event_handler); +void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); int ib_query_port(struct ib_device *device, -- cgit From 78b57f9529225111b440e6e5150f52f5d44e3c60 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:37 +0300 Subject: RDMA/core: Cleanup device capability enum Cleanup patch prior exporting the ib_device_cap_flags to the user space. In this patch, we are aligning the indentation, removing IB_DEVICE_INIT_TYPE and IB_DEVICE_RESERVED fields, because it is not used in the kernel. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e536a052e5dd..355c7a328e0b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -170,7 +170,7 @@ enum ib_device_cap_flags { IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), IB_DEVICE_SHUTDOWN_PORT = (1 << 8), - IB_DEVICE_INIT_TYPE = (1 << 9), + /* Not in use, former INIT_TYPE = (1 << 9),*/ IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), @@ -185,7 +185,7 @@ enum ib_device_cap_flags { * which will always contain a usable lkey. */ IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), - IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16), + /* Reserved, old SEND_W_INV = (1 << 16),*/ IB_DEVICE_MEM_WINDOW = (1 << 17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support @@ -220,7 +220,7 @@ enum ib_device_cap_flags { * of I/O operations with single completion queue managed * by hardware. */ - IB_DEVICE_CROSS_CHANNEL = (1 << 27), + IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), -- cgit From 078b3573030346df0cdc46d798c0f434dc53c2cc Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:47 +0300 Subject: IB/mlx4: Fix struct mlx4_ib_create_wq alignment The mlx4 ABI defines to have structures with alignment of 64B. Fixes: 400b1ebcfe31 ("IB/mlx4: Add support for WQ related verbs") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 21cce1a4c4dd..0e10102861b5 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -121,7 +121,6 @@ struct mlx4_ib_create_wq { __u8 log_range_size; __u8 reserved[3]; __u32 comp_mask; - __u32 reserved1; }; struct mlx4_ib_modify_wq { -- cgit From b23673f86fd0d9ccbc088e88e29899b4d3f0f055 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:48 +0300 Subject: IB/mlx4: Remove redundant attribute in mlx4_ib_create_qp_rss struct rx_key_len is not in use and needs to be removed. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx4-abi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 0e10102861b5..c55f60e05f86 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -98,8 +98,7 @@ struct mlx4_ib_create_srq_resp { struct mlx4_ib_create_qp_rss { __u64 rx_hash_fields_mask; __u8 rx_hash_function; - __u8 rx_key_len; - __u8 reserved[6]; + __u8 reserved[7]; __u8 rx_hash_key[40]; __u32 comp_mask; __u32 reserved1; -- cgit From 96dc3fc5f1d66b20cdf839d571c7b907e08d5d00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 17 Aug 2017 15:52:28 +0300 Subject: IB/mlx5: Expose software parsing for Raw Ethernet QP Software parsing (SWP) is a feature that can be used to instruct the device to stop using its internal parser and to parse packets on the transmit path according to offsets set for each packets. Through this feature, the device allows the handling of checksum and LSO by the hardware according to the location of IP and TCP/UDP headers. Enable SW parsing on Raw Ethernet send queue by default if firmware supports it and report these capabilities to user space. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0b3d30837a9f..64d398e662cd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,22 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_sw_parsing_offloads { + MLX5_IB_SW_PARSING = 1 << 0, + MLX5_IB_SW_PARSING_CSUM = 1 << 1, + MLX5_IB_SW_PARSING_LSO = 1 << 2, +}; + +struct mlx5_ib_sw_parsing_caps { + __u32 sw_parsing_offloads; /* enum mlx5_ib_sw_parsing_offloads */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -177,6 +193,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; + struct mlx5_ib_sw_parsing_caps sw_parsing_caps; }; struct mlx5_ib_create_cq { -- cgit From 8b7ff7f3b301de52924cb2cf3fed47b181893116 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:29 +0300 Subject: IB/mlx5: Enable UMR for MRs created with reg_create This patch is the first step in decoupling UMR usage and allocation from the MR cache. The only functional change in this patch is to enables UMR for MRs created with reg_create. This change fixes a bug where ODP memory regions that were not allocated from the MR cache did not have UMR enabled. Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index db40bc4055c7..99d88624ad07 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,7 +1093,7 @@ enum { }; enum { - MAX_UMR_CACHE_ENTRY = 20, + MR_CACHE_LAST_STD_ENTRY = 20, MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, MAX_MR_CACHE_ENTRIES -- cgit From a550ddfc543e250798048cf4eabe721cd85ac724 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Aug 2017 15:52:33 +0300 Subject: IB/mlx5: Add support for multi underlay QP Set underlay QPN as part of flow rule when it's applicable. There is one root flow table in the NIC RX namespace and all the underlay QPs steer the traffic to this flow table. In order to prevent QP to get traffic which is not target to its underlay QP, we need to set the underlay QP number as part of the steering matching. Note: When multicast traffic is sent the QPN filtering is done by the firmware as some early step. Adding the QPN match on the flow table entry is wrong as by that time the target QPN holds the multicast address (e.g. FF(s)) and it won't match. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5bae70eb25af..6563500c85de 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -295,8 +295,10 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; u8 reserved_at_37[0x9]; + u8 reserved_at_40[0x1a]; + u8 bth_dst_qp[0x1]; - u8 reserved_at_40[0x40]; + u8 reserved_at_5b[0x25]; }; struct mlx5_ifc_flow_table_prop_layout_bits { @@ -432,7 +434,9 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_100[0xc]; u8 inner_ipv6_flow_label[0x14]; - u8 reserved_at_120[0xe0]; + u8 reserved_at_120[0x28]; + u8 bth_dst_qp[0x18]; + u8 reserved_at_160[0xa0]; }; struct mlx5_ifc_cmd_pas_bits { -- cgit From 795b609c8b59f8f20fa9d72bf8b4ae3b8aa5582c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:34 +0300 Subject: IB/mlx5: Allow posting multi packet send WQEs if hardware supports Set the field to allow posting multi packet send WQEs if hardware supports this feature. This doesn't mean the send WQEs will be for multi packet unless the send WQE was prepared according to multi packet send WQE format. User space shall use flag MLX5_IB_ALLOW_MPW to check if hardware supports MPW and allows MPW in SQ context. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6563500c85de..6865e60ba473 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2445,7 +2445,7 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_4[0x1]; + u8 allow_multi_pkt_send_wqe[0x1]; u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 64d398e662cd..a61a512e5747 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,11 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_mpw_caps { + MPW_RESERVED = 1 << 0, + MLX5_IB_ALLOW_MPW = 1 << 1, +}; + enum mlx5_ib_sw_parsing_offloads { MLX5_IB_SW_PARSING = 1 << 0, MLX5_IB_SW_PARSING_CSUM = 1 << 1, -- cgit From 050da902adde8faf6b1bef15ac4876ae145358f4 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:35 +0300 Subject: IB/mlx5: Report mlx5 enhanced multi packet WQE capability Expose enhanced multi packet WQE capability to user space through query_device by uhw. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6865e60ba473..4eff0b8a1482 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -604,7 +604,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 rss_ind_tbl_cap[0x4]; u8 reg_umr_sq[0x1]; u8 scatter_fcs[0x1]; - u8 reserved_at_1a[0x1]; + u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a61a512e5747..1791bf123ba9 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -171,6 +171,7 @@ struct mlx5_packet_pacing_caps { enum mlx5_ib_mpw_caps { MPW_RESERVED = 1 << 0, MLX5_IB_ALLOW_MPW = 1 << 1, + MLX5_IB_SUPPORT_EMPW = 1 << 2, }; enum mlx5_ib_sw_parsing_offloads { -- cgit From 22b6722bfa591ba03d6a0c5521b600d4ab2d9a27 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:55:41 +0200 Subject: ipv6: Add sysctl for per namespace flow label reflection Reflecting IPv6 Flow Label at server nodes is useful in environments that employ multipath routing to load balance the requests. As "IPv6 Flow Label Reflection" standard draft [1] points out - ICMPv6 PTB error messages generated in response to a downstream packets from the server can be routed by a load balancer back to the original server without looking at transport headers, if the server applies the flow label reflection. This enables the Path MTU Discovery past the ECMP router in load-balance or anycast environments where each server node is reachable by only one path. Introduce a sysctl to enable flow label reflection per net namespace for all newly created sockets. Same could be earlier achieved only per socket by setting the IPV6_FL_F_REFLECT flag for the IPV6_FLOWLABEL_MGR socket option. [1] https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01 Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0e50bf3ed097..2544f9760a42 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,6 +36,7 @@ struct netns_sysctl_ipv6 { int idgen_retries; int idgen_delay; int flowlabel_state_ranges; + int flowlabel_reflect; }; struct netns_ipv6 { -- cgit From 790c6056686cc4dd5b149b330bbd5ae208d4d721 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2017 18:10:46 -0700 Subject: devlink: Fix devlink_dpipe_table_register() stub signature. One too many arguments compared to the non-stub version. Reported-by: kbuild test robot Fixes: ffd3cdccf214 ("devlink: Add support for dynamic table size") Signed-off-by: David S. Miller --- include/net/devlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 047a0c54f652..aaf7178127a2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -402,8 +402,7 @@ static inline int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern) + void *priv, bool counter_control_extern) { return 0; } -- cgit From 29825717123fb9cfb9e709327d565c2f2fa89903 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:28 +0200 Subject: net: Extend struct flowi6 with multipath hash Allow for functions that fill out the IPv6 flow info to also pass a hash computed over the skb contents. The hash value will drive the multipath routing decisions. This is intended for special treatment of ICMPv6 errors, where we would like to make a routing decision based on the flow identifying the offending IPv6 datagram that triggered the error, rather than the flow of the ICMP error itself. Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/flow.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index f3dc61b29bb5..eb60cee30b44 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -149,6 +149,7 @@ struct flowi6 { #define fl6_ipsec_spi uli.spi #define fl6_mh_type uli.mht.type #define fl6_gre_key uli.gre_key + __u32 mp_hash; } __attribute__((__aligned__(BITS_PER_LONG/8))); struct flowidn { -- cgit From 23aebdacb05dab9efdf22b9e0413491cbd5f128f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:29 +0200 Subject: ipv6: Compute multipath hash for ICMP errors from offending packet When forwarding or sending out an ICMPv6 error, look at the embedded packet that triggered the error and compute a flow hash over its headers. This let's us route the ICMP error together with the flow it belongs to when multipath (ECMP) routing is in use, which in turn makes Path MTU Discovery work in ECMP load-balanced or anycast setups (RFC 7690). Granted, end-hosts behind the ECMP router (aka servers) need to reflect the IPv6 Flow Label for PMTUD to work. The code is organized to be in parallel with ipv4 stack: ip_multipath_l3_keys -> ip6_multipath_l3_keys fib_multipath_hash -> rt6_multipath_hash Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 907d39a42f6b..882bc3c7ccde 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -115,6 +115,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); -- cgit From 2412e927604e0af7e17ae4b688b85a1e87e378fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 11:59:49 -0400 Subject: sunrpc: Const-ify instances of struct svc_xprt_ops Close an attack vector by moving the arrays of server-side transport methods to read-only memory. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ddb7f94a9d06..6a2ad38f5458 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -31,7 +31,7 @@ struct svc_xprt_ops { struct svc_xprt_class { const char *xcl_name; struct module *xcl_owner; - struct svc_xprt_ops *xcl_ops; + const struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; int xcl_ident; @@ -49,7 +49,7 @@ struct svc_xpt_user { struct svc_xprt { struct svc_xprt_class *xpt_class; - struct svc_xprt_ops *xpt_ops; + const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; struct list_head xpt_ready; -- cgit From afea5657c20b11ec9f895ac5cc33b560fb1e0276 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 12:00:06 -0400 Subject: sunrpc: Const-ify struct sv_serv_ops Close an attack vector by moving the arrays of per-server methods to read-only memory. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a3f8af9bd543..38f561b2dda3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -99,7 +99,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - struct svc_serv_ops *sv_ops; /* server operations */ + const struct svc_serv_ops *sv_ops; /* server operations */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -465,7 +465,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - struct svc_serv_ops *); + const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, @@ -475,7 +475,7 @@ void svc_exit_thread(struct svc_rqst *); unsigned int svc_pool_map_get(void); void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *); + const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -- cgit From 551143d8d954fe398324a5caa276f518466c428b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Aug 2017 21:12:28 -0700 Subject: net_sched: fix a refcount_t issue with noop_qdisc syzkaller reported a refcount_t warning [1] Issue here is that noop_qdisc refcnt was never really considered as a true refcount, since qdisc_destroy() found TCQ_F_BUILTIN set : if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt))) return; Meaning that all atomic_inc() we did on noop_qdisc.refcnt were not really needed, but harmless until refcount_t came. To fix this problem, we simply need to not increment noop_qdisc.refcnt, since we never decrement it. [1] refcount_t: increment on 0; use-after-free. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 21754 at lib/refcount.c:152 refcount_inc+0x47/0x50 lib/refcount.c:152 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 21754 Comm: syz-executor7 Not tainted 4.13.0-rc6+ #20 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:180 __warn+0x1c4/0x1d9 kernel/panic.c:541 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190 do_trap_no_signal arch/x86/kernel/traps.c:224 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:273 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846 RIP: 0010:refcount_inc+0x47/0x50 lib/refcount.c:152 RSP: 0018:ffff8801c43477a0 EFLAGS: 00010282 RAX: 000000000000002b RBX: ffffffff86093c14 RCX: 0000000000000000 RDX: 000000000000002b RSI: ffffffff8159314e RDI: ffffed0038868ee8 RBP: ffff8801c43477a8 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff86093ac0 R13: 0000000000000001 R14: ffff8801d0f3bac0 R15: dffffc0000000000 attach_default_qdiscs net/sched/sch_generic.c:792 [inline] dev_activate+0x7d3/0xaa0 net/sched/sch_generic.c:833 __dev_open+0x227/0x330 net/core/dev.c:1380 __dev_change_flags+0x695/0x990 net/core/dev.c:6726 dev_change_flags+0x88/0x140 net/core/dev.c:6792 dev_ifsioc+0x5a6/0x930 net/core/dev_ioctl.c:256 dev_ioctl+0x2bc/0xf90 net/core/dev_ioctl.c:554 sock_do_ioctl+0x94/0xb0 net/socket.c:968 sock_ioctl+0x2c2/0x440 net/socket.c:1058 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 Fixes: 7b9364050246 ("net, sched: convert Qdisc.refcnt from atomic_t to refcount_t") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Reshetova, Elena Signed-off-by: David S. Miller --- include/net/sch_generic.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 67f815e5d525..c1109cdbbfa6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -101,6 +101,13 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; }; +static inline void qdisc_refcount_inc(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return; + refcount_inc(&qdisc->refcnt); +} + static inline bool qdisc_is_running(const struct Qdisc *qdisc) { return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; -- cgit From 3fd87127073292538047adf1c9c757e9cab0dd56 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 14:38:51 -0700 Subject: strparser: initialize all callbacks commit bbb03029a899 ("strparser: Generalize strparser") added more function pointers to 'struct strp_callbacks'; however, kcm_attach() was not updated to initialize them. This could cause the ->lock() and/or ->unlock() function pointers to be set to garbage values, causing a crash in strp_work(). Fix the bug by moving the callback structs into static memory, so unspecified members are zeroed. Also constify them while we're at it. This bug was found by syzkaller, which encountered the following splat: IP: 0x55 PGD 3b1ca067 P4D 3b1ca067 PUD 3b12f067 PMD 0 Oops: 0010 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 1194 Comm: kworker/u8:1 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: kstrp strp_work task: ffff88006bb0e480 task.stack: ffff88006bb10000 RIP: 0010:0x55 RSP: 0018:ffff88006bb17540 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88006ce4bd60 RCX: 0000000000000000 RDX: 1ffff1000d9c97bd RSI: 0000000000000000 RDI: ffff88006ce4bc48 RBP: ffff88006bb17558 R08: ffffffff81467ab2 R09: 0000000000000000 R10: ffff88006bb17438 R11: ffff88006bb17940 R12: ffff88006ce4bc48 R13: ffff88003c683018 R14: ffff88006bb17980 R15: ffff88003c683000 FS: 0000000000000000(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000055 CR3: 000000003c145000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2098 worker_thread+0x223/0x1860 kernel/workqueue.c:2233 kthread+0x35e/0x430 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Code: Bad RIP value. RIP: 0x55 RSP: ffff88006bb17540 CR2: 0000000000000055 ---[ end trace f0e4920047069cee ]--- Here is a C reproducer (requires CONFIG_BPF_SYSCALL=y and CONFIG_AF_KCM=y): #include #include #include #include #include #include #include #include static const struct bpf_insn bpf_insns[3] = { { .code = 0xb7 }, /* BPF_MOV64_IMM(0, 0) */ { .code = 0x95 }, /* BPF_EXIT_INSN() */ }; static const union bpf_attr bpf_attr = { .prog_type = 1, .insn_cnt = 2, .insns = (uintptr_t)&bpf_insns, .license = (uintptr_t)"", }; int main(void) { int bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &bpf_attr, sizeof(bpf_attr)); int inet_fd = socket(AF_INET, SOCK_STREAM, 0); int kcm_fd = socket(AF_KCM, SOCK_DGRAM, 0); ioctl(kcm_fd, SIOCKCMATTACH, &(struct kcm_attach) { .fd = inet_fd, .bpf_fd = bpf_fd }); } Fixes: bbb03029a899 ("strparser: Generalize strparser") Cc: Dmitry Vyukov Cc: Tom Herbert Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- include/net/strparser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/strparser.h b/include/net/strparser.h index 4fe966a0ad92..7dc131d62ad5 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -138,7 +138,7 @@ void strp_done(struct strparser *strp); void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); int strp_init(struct strparser *strp, struct sock *sk, - struct strp_callbacks *cb); + const struct strp_callbacks *cb); void strp_data_ready(struct strparser *strp); int strp_process(struct strparser *strp, struct sk_buff *orig_skb, unsigned int orig_offset, size_t orig_len, -- cgit From 6ae5fa61d27dcb055f4198bcf6c8dbbf1bb33f52 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Aug 2017 15:21:54 -0700 Subject: perf/x86: Fix data source decoding for Skylake Skylake changed the encoding of the PEBS data source field. Some combinations are not available anymore, but some new cases e.g. for L4 cache hit are added. Fix up the conversion table for Skylake, similar as had been done for Nehalem. On Skylake server the encoding for L4 actually means persistent memory. Handle this case too. To properly describe it in the abstracted perf format I had to add some new fields. Since a hit can have only one level add a new field that is an enumeration, not a bit field to describe the level. It can describe any level. Some numbers are also used to describe PMEM and LFB. Also add a new generic remote flag that can be combined with the generic level to signify a remote cache. And there is an extension field for the snoop indication to handle the Forward state. I didn't add a generic flag for hops because it's not needed for Skylake. I changed the existing encodings for older CPUs to also fill in the new level and remote fields. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: jolsa@kernel.org Link: http://lkml.kernel.org/r/20170816222156.19953-3-andi@firstfloor.org Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 642db5fa3286..2a37ae925d85 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -954,14 +954,20 @@ union perf_mem_data_src { mem_snoop:5, /* snoop mode */ mem_lock:2, /* lock instr */ mem_dtlb:7, /* tlb access */ - mem_rsvd:31; + mem_lvl_num:4, /* memory hierarchy level number */ + mem_remote:1, /* remote */ + mem_snoopx:2, /* snoop mode, ext */ + mem_rsvd:24; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:31, + __u64 mem_rsvd:24, + mem_snoopx:2, /* snoop mode, ext */ + mem_remote:1, /* remote */ + mem_lvl_num:4, /* memory hierarchy level number */ mem_dtlb:7, /* tlb access */ mem_lock:2, /* lock instr */ mem_snoop:5, /* snoop mode */ @@ -998,6 +1004,22 @@ union perf_mem_data_src { #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ #define PERF_MEM_LVL_SHIFT 5 +#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ +/* 5-0xa available */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + /* snoop mode */ #define PERF_MEM_SNOOP_NA 0x01 /* not available */ #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ @@ -1006,6 +1028,10 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ #define PERF_MEM_SNOOP_SHIFT 19 +#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ +/* 1 free */ +#define PERF_MEM_SNOOPX_SHIFT 37 + /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -- cgit From 0e709703af5bbc9ea6e75e1f99c2dd0dae261869 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 11 Aug 2017 16:04:50 +0200 Subject: mm, locking/barriers: Clarify tlb_flush_pending() barriers Better document the ordering around tlb_flush_pending(). Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 78 ++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index dc1edec05a3f..57378c7cb5f8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -526,30 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, extern void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -/* - * Memory barriers to keep this state in sync are graciously provided by - * the page table locks, outside of which no page table modifications happen. - * The barriers are used to ensure the order between tlb_flush_pending updates, - * which happen while the lock is not taken, and the PTE updates, which happen - * while the lock is taken, are serialized. - */ -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) -{ - /* - * Must be called with PTL held; such that our PTL acquire will have - * observed the store from set_tlb_flush_pending(). - */ - return atomic_read(&mm->tlb_flush_pending) > 0; -} - -/* - * Returns true if there are two above TLB batching threads in parallel. - */ -static inline bool mm_tlb_flush_nested(struct mm_struct *mm) -{ - return atomic_read(&mm->tlb_flush_pending) > 1; -} - static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); @@ -558,7 +534,6 @@ static inline void init_tlb_flush_pending(struct mm_struct *mm) static inline void inc_tlb_flush_pending(struct mm_struct *mm) { atomic_inc(&mm->tlb_flush_pending); - /* * The only time this value is relevant is when there are indeed pages * to flush. And we'll only flush pages after changing them, which @@ -580,24 +555,61 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm) * flush_tlb_range(); * atomic_dec(&mm->tlb_flush_pending); * - * So the =true store is constrained by the PTL unlock, and the =false - * store is constrained by the TLB invalidate. + * Where the increment if constrained by the PTL unlock, it thus + * ensures that the increment is visible if the PTE modification is + * visible. After all, if there is no PTE modification, nobody cares + * about TLB flushes either. + * + * This very much relies on users (mm_tlb_flush_pending() and + * mm_tlb_flush_nested()) only caring about _specific_ PTEs (and + * therefore specific PTLs), because with SPLIT_PTE_PTLOCKS and RCpc + * locks (PPC) the unlock of one doesn't order against the lock of + * another PTL. + * + * The decrement is ordered by the flush_tlb_range(), such that + * mm_tlb_flush_pending() will not return false unless all flushes have + * completed. */ } -/* Clearing is done after a TLB flush, which also provides a barrier. */ static inline void dec_tlb_flush_pending(struct mm_struct *mm) { /* - * Guarantee that the tlb_flush_pending does not not leak into the - * critical section, since we must order the PTE change and changes to - * the pending TLB flush indication. We could have relied on TLB flush - * as a memory barrier, but this behavior is not clearly documented. + * See inc_tlb_flush_pending(). + * + * This cannot be smp_mb__before_atomic() because smp_mb() simply does + * not order against TLB invalidate completion, which is what we need. + * + * Therefore we must rely on tlb_flush_*() to guarantee order. */ - smp_mb__before_atomic(); atomic_dec(&mm->tlb_flush_pending); } +static inline bool mm_tlb_flush_pending(struct mm_struct *mm) +{ + /* + * Must be called after having acquired the PTL; orders against that + * PTLs release and therefore ensures that if we observe the modified + * PTE we must also observe the increment from inc_tlb_flush_pending(). + * + * That is, it only guarantees to return true if there is a flush + * pending for _this_ PTL. + */ + return atomic_read(&mm->tlb_flush_pending); +} + +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + /* + * Similar to mm_tlb_flush_pending(), we must have acquired the PTL + * for which there is a TLB flush pending in order to guarantee + * we've seen both that PTE modification and the increment. + * + * (no requirement on actually still holding the PTL, that is irrelevant) + */ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + struct vm_fault; struct vm_special_mapping { -- cgit From e6f3faa734a00c606b7b06c6b9f15e5627d3245b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Aug 2017 13:23:30 +0200 Subject: locking/lockdep: Fix workqueue crossrelease annotation The new completion/crossrelease annotations interact unfavourable with the extant flush_work()/flush_workqueue() annotations. The problem is that when a single work class does: wait_for_completion(&C) and complete(&C) in different executions, we'll build dependencies like: lock_map_acquire(W) complete_acquire(C) and lock_map_acquire(W) complete_release(C) which results in the dependency chain: W->C->W, which lockdep thinks spells deadlock, even though there is no deadlock potential since works are ran concurrently. One possibility would be to change the work 'lock' to recursive-read, but that would mean hitting a lockdep limitation on recursive locks. Also, unconditinoally switching to recursive-read here would fail to detect the actual deadlock on single-threaded workqueues, which do have a problem with this. For now, forcefully disregard these locks for crossrelease. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: byungchul.park@lge.com Cc: david@fromorbit.com Cc: johannes@sipsolutions.net Cc: oleg@redhat.com Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 4 ++-- include/linux/lockdep.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5fdd93bb9300..9bc050bc81b2 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -26,7 +26,7 @@ # define trace_hardirq_enter() \ do { \ current->hardirq_context++; \ - crossrelease_hist_start(XHLOCK_HARD); \ + crossrelease_hist_start(XHLOCK_HARD, 0);\ } while (0) # define trace_hardirq_exit() \ do { \ @@ -36,7 +36,7 @@ do { \ # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ - crossrelease_hist_start(XHLOCK_SOFT); \ + crossrelease_hist_start(XHLOCK_SOFT, 0);\ } while (0) # define lockdep_softirq_exit() \ do { \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fc827cab6d6e..78bb7133abed 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -18,6 +18,8 @@ extern int lock_stat; #define MAX_LOCKDEP_SUBCLASSES 8UL +#include + #ifdef CONFIG_LOCKDEP #include @@ -578,11 +580,11 @@ extern void lock_commit_crosslock(struct lockdep_map *lock); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), .cross = 0, } -extern void crossrelease_hist_start(enum xhlock_context_t c); +extern void crossrelease_hist_start(enum xhlock_context_t c, bool force); extern void crossrelease_hist_end(enum xhlock_context_t c); extern void lockdep_init_task(struct task_struct *task); extern void lockdep_free_task(struct task_struct *task); -#else +#else /* !CROSSRELEASE */ #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. @@ -591,11 +593,11 @@ extern void lockdep_free_task(struct task_struct *task); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } -static inline void crossrelease_hist_start(enum xhlock_context_t c) {} +static inline void crossrelease_hist_start(enum xhlock_context_t c, bool force) {} static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} -#endif +#endif /* CROSSRELEASE */ #ifdef CONFIG_LOCK_STAT -- cgit From f44d85389e17b2e960620c1c6d89bda978a11f2b Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 24 Aug 2017 16:08:14 +0100 Subject: drm: rename u32 in __u32 in uapi All other fields use __ Cc: Ben Widawsky Fixes: db1689aa61b ("drm: Create a format/modifier blob") Signed-off-by: Lionel Landwerlin Signed-off-by: Daniel Stone Reviewed-by: Chris Wilson Reviewed-by: Emil Velikov Reviewed-by: Ben Widawsky Reviewed-by: Daniel Stone Link: https://patchwork.freedesktop.org/patch/msgid/20170824150814.5878-1-lionel.g.landwerlin@intel.com --- include/uapi/drm/drm_mode.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index a2bb7161f020..54fc38c3c3f1 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -715,24 +715,24 @@ struct drm_mode_atomic { struct drm_format_modifier_blob { #define FORMAT_BLOB_CURRENT 1 /* Version of this blob format */ - u32 version; + __u32 version; /* Flags */ - u32 flags; + __u32 flags; /* Number of fourcc formats supported */ - u32 count_formats; + __u32 count_formats; /* Where in this blob the formats exist (in bytes) */ - u32 formats_offset; + __u32 formats_offset; /* Number of drm_format_modifiers */ - u32 count_modifiers; + __u32 count_modifiers; /* Where in this blob the modifiers exist (in bytes) */ - u32 modifiers_offset; + __u32 modifiers_offset; - /* u32 formats[] */ + /* __u32 formats[] */ /* struct drm_format_modifier modifiers[] */ }; -- cgit From 3d52a7924172adf651eda8f1c95ff38146059307 Mon Sep 17 00:00:00 2001 From: Shreyas NC Date: Wed, 23 Aug 2017 19:33:49 +0530 Subject: ASoC: Intel: uapi: Add new tokens for module common data The module private data can be modelled independent of its instances so that it can be reused by the module instances. So move module data to common manifest which can be referenced by the module instances. This requires new tokens to be defined to accommodate these changes. The new tokens will specify buffer sizes, DSP cycles and respective indexes corresponding to the pcm params in the topology manifest so that driver need not compute them. Signed-off-by: Shreyas NC Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- include/uapi/sound/snd_sst_tokens.h | 92 ++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/sound/snd_sst_tokens.h b/include/uapi/sound/snd_sst_tokens.h index dedb2056160d..f691e421f5e8 100644 --- a/include/uapi/sound/snd_sst_tokens.h +++ b/include/uapi/sound/snd_sst_tokens.h @@ -163,8 +163,71 @@ * * %SKL_TKN_U32_DMA_BUF_SIZE: DMA buffer size in millisec * + * %SKL_TKN_U32_PIPE_DIR: Specifies pipe direction. Can be + * playback/capture. + * + * %SKL_TKN_U32_NUM_CONFIGS: Number of pipe configs + * + * %SKL_TKN_U32_PATH_MEM_PGS: Size of memory (in pages) required for pipeline + * and its data + * + * %SKL_TKN_U32_PIPE_CONFIG_ID: Config id for the modules in the pipe + * and PCM params supported by that pipe + * config. This is used as index to fill + * up the pipe config and module config + * structure. + * + * %SKL_TKN_U32_CFG_FREQ: + * %SKL_TKN_U8_CFG_CHAN: + * %SKL_TKN_U8_CFG_BPS: PCM params (freq, channels, bits per sample) + * supported for each of the pipe configs. + * + * %SKL_TKN_CFG_MOD_RES_ID: Module's resource index for each of the + * pipe config + * + * %SKL_TKN_CFG_MOD_FMT_ID: Module's interface index for each of the + * pipe config + * + * %SKL_TKN_U8_NUM_MOD: Number of modules in the manifest + * + * %SKL_TKN_MM_U8_MOD_IDX: Current index of the module in the manifest + * + * %SKL_TKN_MM_U8_NUM_RES: Number of resources for the module + * + * %SKL_TKN_MM_U8_NUM_INTF: Number of interfaces for the module + * + * %SKL_TKN_MM_U32_RES_ID: Resource index for the resource info to + * be filled into. + * A module can support multiple resource + * configuration and is represnted as a + * resource table. This index is used to + * fill information into appropriate index. + * + * %SKL_TKN_MM_U32_CPS: DSP cycles per second + * + * %SKL_TKN_MM_U32_DMA_SIZE: Allocated buffer size for gateway DMA + * + * %SKL_TKN_MM_U32_CPC: DSP cycles allocated per frame + * + * %SKL_TKN_MM_U32_RES_PIN_ID: Resource pin index in the module + * + * %SKL_TKN_MM_U32_INTF_PIN_ID: Interface index in the module + * + * %SKL_TKN_MM_U32_PIN_BUF: Buffer size of the module pin + * + * %SKL_TKN_MM_U32_FMT_ID: Format index for each of the interface/ + * format information to be filled into. + * + * %SKL_TKN_MM_U32_NUM_IN_FMT: Number of input formats + * %SKL_TKN_MM_U32_NUM_OUT_FMT: Number of output formats + * * module_id and loadable flags dont have tokens as these values will be * read from the DSP FW manifest + * + * Tokens defined can be used either in the manifest or widget private data. + * + * SKL_TKN_MM is used as a suffix for all tokens that represent + * module data in the manifest. */ enum SKL_TKNS { SKL_TKN_UUID = 1, @@ -218,7 +281,34 @@ enum SKL_TKNS { SKL_TKL_U32_D0I3_CAPS, /* Typo added at v4.10 */ SKL_TKN_U32_D0I3_CAPS = SKL_TKL_U32_D0I3_CAPS, SKL_TKN_U32_DMA_BUF_SIZE, - SKL_TKN_MAX = SKL_TKN_U32_DMA_BUF_SIZE, + + SKL_TKN_U32_PIPE_DIRECTION, + SKL_TKN_U32_PIPE_CONFIG_ID, + SKL_TKN_U32_NUM_CONFIGS, + SKL_TKN_U32_PATH_MEM_PGS, + + SKL_TKN_U32_CFG_FREQ, + SKL_TKN_U8_CFG_CHAN, + SKL_TKN_U8_CFG_BPS, + SKL_TKN_CFG_MOD_RES_ID, + SKL_TKN_CFG_MOD_FMT_ID, + SKL_TKN_U8_NUM_MOD, + + SKL_TKN_MM_U8_MOD_IDX, + SKL_TKN_MM_U8_NUM_RES, + SKL_TKN_MM_U8_NUM_INTF, + SKL_TKN_MM_U32_RES_ID, + SKL_TKN_MM_U32_CPS, + SKL_TKN_MM_U32_DMA_SIZE, + SKL_TKN_MM_U32_CPC, + SKL_TKN_MM_U32_RES_PIN_ID, + SKL_TKN_MM_U32_INTF_PIN_ID, + SKL_TKN_MM_U32_PIN_BUF, + SKL_TKN_MM_U32_FMT_ID, + SKL_TKN_MM_U32_NUM_IN_FMT, + SKL_TKN_MM_U32_NUM_OUT_FMT, + + SKL_TKN_MAX = SKL_TKN_MM_U32_NUM_OUT_FMT, }; #endif -- cgit From b8972bf0521ca7ee3c8d27da0fd101fe648acfc2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 25 Aug 2017 00:29:01 +0000 Subject: ASoC: remove duplicate definition of controls/num_controls snd_soc_component and snd_soc_component_driver both have controls/num_controls, but these are duplicated. Let's remove duplicated definition. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index de57a8ad057b..e4b4746032ba 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -862,8 +862,6 @@ struct snd_soc_component { /* Don't use these, use snd_soc_component_get_dapm() */ struct snd_soc_dapm_context dapm; - const struct snd_kcontrol_new *controls; - unsigned int num_controls; const struct snd_soc_dapm_widget *dapm_widgets; unsigned int num_dapm_widgets; const struct snd_soc_dapm_route *dapm_routes; -- cgit From 688d0ebf49a97aa6959aebec5d177a8b821f27d1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 25 Aug 2017 00:29:20 +0000 Subject: ASoC: remove duplicate definition of dapm_widgets/num_dapm_widgets snd_soc_component and snd_soc_component_driver both have dapm_widgets/num_dapm_widgets, but these are duplicated. Let's remove duplicated definition. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index e4b4746032ba..3e0070ed6883 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -862,8 +862,6 @@ struct snd_soc_component { /* Don't use these, use snd_soc_component_get_dapm() */ struct snd_soc_dapm_context dapm; - const struct snd_soc_dapm_widget *dapm_widgets; - unsigned int num_dapm_widgets; const struct snd_soc_dapm_route *dapm_routes; unsigned int num_dapm_routes; struct snd_soc_codec *codec; -- cgit From 6969b2bae6dbe7f277a7c018b66d2f3ad662129f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 25 Aug 2017 00:29:41 +0000 Subject: ASoC: remove duplicate definition of dapm_routes/num_dapm_routes snd_soc_component and snd_soc_component_driver both have dapm_routes/num_dapm_routes, but these are duplicated. Let's remove duplicated definition. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 3e0070ed6883..2bcb38045dc1 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -862,8 +862,6 @@ struct snd_soc_component { /* Don't use these, use snd_soc_component_get_dapm() */ struct snd_soc_dapm_context dapm; - const struct snd_soc_dapm_route *dapm_routes; - unsigned int num_dapm_routes; struct snd_soc_codec *codec; int (*probe)(struct snd_soc_component *); -- cgit From 5237e95f63761477b7ea45499d08b89383a77eab Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Jul 2017 18:29:27 +0100 Subject: dma-mapping: reduce dma_mapping_error inline bloat Thanks to the nested inlining, all drivers correctly calling dma_mapping_error() after a mapping a page or single buffer generate two calls to get_arch_dma_ops() per callsite, which all adds up to a fair old chunk of useless code, e.g. ~3KB for an arm64 defconfig plus extras: text data bss dec hex filename 13051391 1503898 327768 14883057 e318f1 vmlinux.o.old 13050751 1503898 327768 14882417 e31671 vmlinux.o.new Give the compiler a hand by making it clear we want the same ops. Signed-off-by: Robin Murphy Reviewed-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 03c0196a6f24..66d8ea68f40b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -565,10 +565,11 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - debug_dma_mapping_error(dev, dma_addr); + const struct dma_map_ops *ops = get_dma_ops(dev); - if (get_dma_ops(dev)->mapping_error) - return get_dma_ops(dev)->mapping_error(dev, dma_addr); + debug_dma_mapping_error(dev, dma_addr); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); return 0; } -- cgit From 30d6e0a4190d37740e9447e4e4815f06992dd8c3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 24 Aug 2017 09:31:05 +0200 Subject: futex: Remove duplicated code and fix undefined behaviour There is code duplicated over all architecture's headers for futex_atomic_op_inuser. Namely op decoding, access_ok check for uaddr, and comparison of the result. Remove this duplication and leave up to the arches only the needed assembly which is now in arch_futex_atomic_op_inuser. This effectively distributes the Will Deacon's arm64 fix for undefined behaviour reported by UBSAN to all architectures. The fix was done in commit 5f16a046f8e1 (arm64: futex: Fix undefined behaviour with FUTEX_OP_OPARG_SHIFT usage). Look there for an example dump. And as suggested by Thomas, check for negative oparg too, because it was also reported to cause undefined behaviour report. Note that s390 removed access_ok check in d12a29703 ("s390/uaccess: remove pointless access_ok() checks") as access_ok there returns true. We introduce it back to the helper for the sake of simplicity (it gets optimized away anyway). Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Acked-by: Russell King Acked-by: Michael Ellerman (powerpc) Acked-by: Heiko Carstens [s390] Acked-by: Chris Metcalf [for tile] Reviewed-by: Darren Hart (VMware) Reviewed-by: Will Deacon [core/arm64] Cc: linux-mips@linux-mips.org Cc: Rich Felker Cc: linux-ia64@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: peterz@infradead.org Cc: Benjamin Herrenschmidt Cc: Max Filippov Cc: Paul Mackerras Cc: sparclinux@vger.kernel.org Cc: Jonas Bonn Cc: linux-s390@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: Yoshinori Sato Cc: linux-hexagon@vger.kernel.org Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Catalin Marinas Cc: Matt Turner Cc: linux-snps-arc@lists.infradead.org Cc: Fenghua Yu Cc: Arnd Bergmann Cc: linux-xtensa@linux-xtensa.org Cc: Stefan Kristiansson Cc: openrisc@lists.librecores.org Cc: Ivan Kokshaysky Cc: Stafford Horne Cc: linux-arm-kernel@lists.infradead.org Cc: Richard Henderson Cc: Chris Zankel Cc: Michal Simek Cc: Tony Luck Cc: linux-parisc@vger.kernel.org Cc: Vineet Gupta Cc: Ralf Baechle Cc: Richard Kuo Cc: linux-alpha@vger.kernel.org Cc: Martin Schwidefsky Cc: linuxppc-dev@lists.ozlabs.org Cc: "David S. Miller" Link: http://lkml.kernel.org/r/20170824073105.3901-1-jslaby@suse.cz --- include/asm-generic/futex.h | 50 ++++++++------------------------------------- 1 file changed, 9 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index bf2d34c9d804..f0d8b1c51343 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -13,7 +13,7 @@ */ /** - * futex_atomic_op_inuser() - Atomic arithmetic operation with constant + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant * argument and comparison of the previous * futex value with another constant. * @@ -25,18 +25,11 @@ * <0 - On error */ static inline int -futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) { - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; int oldval, ret; u32 tmp; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - preempt_disable(); pagefault_disable(); @@ -74,17 +67,9 @@ out_pagefault_enable: pagefault_enable(); preempt_enable(); - if (ret == 0) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } + if (ret == 0) + *oval = oldval; + return ret; } @@ -126,18 +111,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #else static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) { - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; int oldval = 0, ret; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) - return -EFAULT; pagefault_disable(); @@ -153,17 +129,9 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) pagefault_enable(); - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } + if (!ret) + *oval = oldval; + return ret; } -- cgit From f52d53a9a0e08e3a003f0f3ff8e163125319af5a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Aug 2017 13:46:27 -0700 Subject: scsi: Remove an obsolete function declaration Commit e9c787e65c0c ("scsi: allocate scsi_cmnd structures as part of struct request") removed the scsi_get_command() function. Hence also remove the declaration of that function. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index a1266d318c85..f5afcff8d76f 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -158,7 +158,6 @@ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) return *(struct scsi_driver **)cmd->request->rq_disk->private_data; } -extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t); extern void scsi_put_command(struct scsi_cmnd *); extern void scsi_finish_command(struct scsi_cmnd *cmd); -- cgit From c2e872a27c610a5e2a71444f86470f765ed9dfce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Aug 2017 13:46:28 -0700 Subject: scsi: Avoid sign extension of scsi_device.type This patch avoids that smatch reports the following: drivers/scsi/scsi_sysfs.c:506 scsi_bus_uevent() warn: argument 3 to %02x specifier has type 'char' drivers/scsi/scsi_sysfs.c:872 sdev_show_modalias() warn: argument 4 to %02x specifier has type 'char' Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 0979a5f3b69a..f054f3f43c75 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -111,7 +111,7 @@ struct scsi_device { unsigned sector_size; /* size in bytes */ void *hostdata; /* available to low-level driver */ - char type; + unsigned char type; char scsi_level; char inq_periph_qual; /* PQ from INQUIRY data */ struct mutex inquiry_mutex; -- cgit From bed2213d01de474eb8a6f3891070eec6be6fe772 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Aug 2017 13:46:32 -0700 Subject: scsi: Use blk_mq_rq_to_pdu() to convert a request to a SCSI command pointer Since commit e9c787e65c0c ("scsi: allocate scsi_cmnd structures as part of struct request") struct request and struct scsi_cmnd are adjacent. This means that there is now an alternative to reading req->special to convert a pointer to a prepared request into a SCSI command pointer, namely by using blk_mq_rq_to_pdu(). Make this change where appropriate. Although this patch does not change any functionality, it slightly improves performance and slightly improves readability. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_tcq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 4416b1026189..5b416debf101 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -39,7 +39,7 @@ static inline struct scsi_cmnd *scsi_host_find_tag(struct Scsi_Host *shost, if (!req) return NULL; - return req->special; + return blk_mq_rq_to_pdu(req); } #endif /* CONFIG_BLOCK */ -- cgit From 85aa93be7558b009fd8b9f0171d8b8a2b44397f7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Aug 2017 13:46:41 -0700 Subject: scsi: scsi_transport_srp: Suppress a W=1 compiler warning Avoid that the following compiler warning is reported when building with W=1: drivers/scsi/scsi_transport_srp.c:92:19: warning: comparison is always false due to limited range of data type [-Wtype-limits] Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_srp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index dd096330734e..56ae198acc73 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -111,7 +111,7 @@ extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, - int dev_loss_tmo); + long dev_loss_tmo); int srp_parse_tmo(int *tmo, const char *buf); extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); -- cgit From cc199e78460565eeab0399875dbf9da8e2901c42 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 Aug 2017 13:57:02 +0200 Subject: scsi: libsas: move bus_reset_handler() to target_reset_handler() The bus reset handler is calling I_T Nexus reset, which logically is a target reset as it need to specify both the initiator and the target. So move it to target reset. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index cfaeed256ab2..e7c012ce5ecd 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -714,7 +714,7 @@ void sas_init_dev(struct domain_device *); void sas_task_abort(struct sas_task *); int sas_eh_abort_handler(struct scsi_cmnd *cmd); int sas_eh_device_reset_handler(struct scsi_cmnd *cmd); -int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd); +int sas_eh_target_reset_handler(struct scsi_cmnd *cmd); extern void sas_target_destroy(struct scsi_target *); extern int sas_slave_alloc(struct scsi_device *); -- cgit From fbb113f773482496d601e0bd934e680b35876016 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 29 Jun 2017 15:12:24 +0200 Subject: i40e/i40evf: rename vf_offload_flags to vf_cap_flags in struct virtchnl_vf_resource The current name of vf_offload_flags indicates that the bitmap is limited to offload related features. Make this more generic by renaming it to vf_cap_flags, which allows for other capabilities besides offloading to be added. Signed-off-by: Stefan Assmann Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index c893b9520a67..becfca2ae94e 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -223,7 +223,7 @@ struct virtchnl_vsi_resource { VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); -/* VF offload flags +/* VF capability flags * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including * TX/RX Checksum offloading and TSO for non-tunnelled packets. */ @@ -251,7 +251,7 @@ struct virtchnl_vf_resource { u16 max_vectors; u16 max_mtu; - u32 vf_offload_flags; + u32 vf_cap_flags; u32 rss_key_size; u32 rss_lut_size; -- cgit From 32d99d0b670299720dd0db92a974c9612c230889 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Fri, 25 Aug 2017 09:56:44 +0200 Subject: ipv6: sr: add support for ip4ip6 encapsulation This patch enables the SRv6 encapsulation mode to carry an IPv4 payload. All the infrastructure was already present, I just had to add a parameter to seg6_do_srh_encap() to specify the inner packet protocol, and perform some additional checks. Usage example: ip route add 1.2.3.4 encap seg6 mode encap segs fc00::1,fc00::2 dev eth0 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/seg6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index 5379f550f521..099bad59dc90 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -60,7 +60,8 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); -extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif -- cgit From 38ee7f2d47565689f35662d488d25e7afc43477d Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Fri, 25 Aug 2017 09:56:45 +0200 Subject: ipv6: sr: add support for encapsulation of L2 frames This patch implements the L2 frame encapsulation mechanism, referred to as T.Encaps.L2 in the SRv6 specifications [1]. A new type of SRv6 tunnel mode is added (SEG6_IPTUN_MODE_L2ENCAP). It only accepts packets with an existing MAC header (i.e., it will not work for locally generated packets). The resulting packet looks like IPv6 -> SRH -> Ethernet -> original L3 payload. The next header field of the SRH is set to NEXTHDR_NONE. [1] https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-01 Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/uapi/linux/seg6_iptunnel.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index b6e5a0a1afd7..b23df9f58354 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -33,16 +33,26 @@ struct seg6_iptunnel_encap { enum { SEG6_IPTUN_MODE_INLINE, SEG6_IPTUN_MODE_ENCAP, + SEG6_IPTUN_MODE_L2ENCAP, }; #ifdef __KERNEL__ static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { - int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); - - return ((tuninfo->srh->hdrlen + 1) << 3) + - (encap * sizeof(struct ipv6hdr)); + int head = 0; + + switch (tuninfo->mode) { + case SEG6_IPTUN_MODE_INLINE: + break; + case SEG6_IPTUN_MODE_ENCAP: + head = sizeof(struct ipv6hdr); + break; + case SEG6_IPTUN_MODE_L2ENCAP: + return 0; + } + + return ((tuninfo->srh->hdrlen + 1) << 3) + head; } #endif -- cgit From ebfa00c5745660fe7f0a91eea88d4dff658486c4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 25 Aug 2017 13:10:12 +0200 Subject: tcp: fix refcnt leak with ebpf congestion control There are a few bugs around refcnt handling in the new BPF congestion control setsockopt: - The new ca is assigned to icsk->icsk_ca_ops even in the case where we cannot get a reference on it. This would lead to a use after free, since that ca is going away soon. - Changing the congestion control case doesn't release the refcnt on the previous ca. - In the reinit case, we first leak a reference on the old ca, then we call tcp_reinit_congestion_control on the ca that we have just assigned, leading to deinitializing the wrong ca (->release of the new ca on the old ca's data) and releasing the refcount on the ca that we actually want to use. This is visible by building (for example) BIC as a module and setting net.ipv4.tcp_congestion_control=bic, and using tcp_cong_kern.c from samples/bpf. This patch fixes the refcount issues, and moves reinit back into tcp core to avoid passing a ca pointer back to BPF. Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control") Signed-off-by: Sabrina Dubroca Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller --- include/net/tcp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index ada65e767b28..f642a39f9eee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1004,9 +1004,7 @@ void tcp_get_default_congestion_control(char *name); void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load); -void tcp_reinit_congestion_control(struct sock *sk, - const struct tcp_congestion_ops *ca); +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); -- cgit From 143976ce992fcf3bfc0f4d15d5726bb492dcf262 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 24 Aug 2017 16:51:29 -0700 Subject: net_sched: remove tc class reference counting For TC classes, their ->get() and ->put() are always paired, and the reference counting is completely useless, because: 1) For class modification and dumping paths, we already hold RTNL lock, so all of these ->get(),->change(),->put() are atomic. 2) For filter bindiing/unbinding, we use other reference counter than this one, and they should have RTNL lock too. 3) For ->qlen_notify(), it is special because it is called on ->enqueue() path, but we already hold qdisc tree lock there, and we hold this tree lock when graft or delete the class too, so it should not be gone or changed until we release the tree lock. Therefore, this patch removes ->get() and ->put(), but: 1) Adds a new ->find() to find the pointer to a class by classid, no refcnt. 2) Move the original class destroy upon the last refcnt into ->delete(), right after releasing tree lock. This is fine because the class is already removed from hash when holding the lock. For those who also use ->put() as ->unbind(), just rename them to reflect this change. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1688f0f6c7ba..d817585aa5df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -147,8 +147,7 @@ struct Qdisc_class_ops { void (*qlen_notify)(struct Qdisc *, unsigned long); /* Class manipulation routines */ - unsigned long (*get)(struct Qdisc *, u32 classid); - void (*put)(struct Qdisc *, unsigned long); + unsigned long (*find)(struct Qdisc *, u32 classid); int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *); int (*delete)(struct Qdisc *, unsigned long); -- cgit From 3cd904ecbb5d0bcf36dfca7e726bdfd6d3644334 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 24 Aug 2017 16:51:30 -0700 Subject: net_sched: kill u32_node pointer in Qdisc It is ugly to hide a u32-filter-specific pointer inside Qdisc, this breaks the TC layers: 1. Qdisc is a generic representation, should not have any specific data of any type 2. Qdisc layer is above filter layer, should only save filters in the list of struct tcf_proto. This pointer is used as the head of the chain of u32 hash tables, that is struct tc_u_hnode, because u32 filter is very special, it allows to create multiple hash tables within one qdisc and across multiple u32 filters. Instead of using this ugly pointer, we can just save it in a global hash table key'ed by (dev ifindex, qdisc handle), therefore we can still treat it as a per qdisc basis data structure conceptually. Of course, because of network namespaces, this key is not unique at all, but it is fine as we already have a pointer to Qdisc in struct tc_u_common, we can just compare the pointers when collision. And this only affects slow paths, has no impact to fast path, thanks to the pointer ->tp_c. Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d817585aa5df..c30b634c5f82 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -75,7 +75,6 @@ struct Qdisc { struct hlist_node hash; u32 handle; u32 parent; - void *u32_node; struct netdev_queue *dev_queue; -- cgit From 64f0f5d18a47c703c85576375cc010e83dac6a48 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 25 Aug 2017 14:31:01 +0200 Subject: udp6: set rx_dst_cookie on rx_dst updates Currently, in the udp6 code, the dst cookie is not initialized/updated concurrently with the RX dst used by early demux. As a result, the dst_check() in the early_demux path always fails, the rx dst cache is always invalidated, and we can't really leverage significant gain from the demux lookup. Fix it adding udp6 specific variant of sk_rx_dst_set() and use it to set the dst cookie when the dst entry is really changed. The issue is there since the introduction of early demux for ipv6. Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/udp.h b/include/net/udp.h index 586de4b811b5..626c2d8a70c5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -260,7 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, } void udp_v4_early_demux(struct sk_buff *skb); -void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); +bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); -- cgit From ccc829ba3624beb9a703fc995d016b836d9eead8 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 25 Aug 2017 16:50:15 +0100 Subject: efi/libstub: Enable reset attack mitigation If a machine is reset while secrets are present in RAM, it may be possible for code executed after the reboot to extract those secrets from untouched memory. The Trusted Computing Group specified a mechanism for requesting that the firmware clear all RAM on reset before booting another OS. This is done by setting the MemoryOverwriteRequestControl variable at startup. If userspace can ensure that all secrets are removed as part of a controlled shutdown, it can reset this variable to 0 before triggering a hardware reboot. Signed-off-by: Matthew Garrett Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170825155019.6740-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4e47f78430be..c241acca0b15 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1504,6 +1504,13 @@ enum efi_secureboot_mode { }; enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table); +#ifdef CONFIG_RESET_ATTACK_MITIGATION +void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg); +#else +static inline void +efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) { } +#endif + /* * Arch code can implement the following three template macros, avoiding * reptition for the void/non-void return cases of {__,}efi_call_virt(): -- cgit From c2ceb5fd4e921506e86208b82fca716a2c3aad59 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 25 Aug 2017 16:50:16 +0100 Subject: efi/random: Increase size of firmware supplied randomness The crng code requires at least 64 bytes (2 * CHACHA20_BLOCK_SIZE) to complete the fast boot-time init, so provide that many bytes when invoking UEFI protocols to seed the entropy pool. Also, add a notice so we can tell from the boot log when the seeding actually took place. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170825155019.6740-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index c241acca0b15..33d41df062bc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1571,6 +1571,8 @@ efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, void *priv, efi_exit_boot_map_processing priv_func); +#define EFI_RANDOM_SEED_SIZE 64U + struct linux_efi_random_seed { u32 size; u8 bits[]; -- cgit From 28e11b15b6606c3308f87f7c9c4c9e404eddde6d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 20 Aug 2017 06:53:10 -0400 Subject: media: cec: replace pin->cur_value by adap->cec_pin_is_high The current CEC pin value (0 or 1) was part of the cec_pin struct, but that assumes that CEC pin monitoring can only be used with a driver that uses the low-level CEC pin framework. But hardware that has both a high-level API and can monitor the CEC pin at low-level at the same time does not need to depend on the cec pin framework. To support such devices remove the cur_value field from struct cec_pin and add a cec_pin_is_high field to cec_adapter. This also makes it possible to drop the '#ifdef CONFIG_CEC_PIN' in cec-api.c. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/cec-pin.h | 1 - include/media/cec.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/cec-pin.h b/include/media/cec-pin.h index d28d07fa312e..f09cc9579d53 100644 --- a/include/media/cec-pin.h +++ b/include/media/cec-pin.h @@ -128,7 +128,6 @@ struct cec_pin { u16 la_mask; bool enabled; bool monitor_all; - bool cur_value; bool rx_eom; bool enable_irq_failed; enum cec_pin_state state; diff --git a/include/media/cec.h b/include/media/cec.h index 60b26fc18464..df6b3bd31284 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -180,6 +180,7 @@ struct cec_adapter { bool needs_hpd; bool is_configuring; bool is_configured; + bool cec_pin_is_high; u32 monitor_all_cnt; u32 monitor_pin_cnt; u32 follower_cnt; -- cgit From 1526c704b3c32640b5e6cdc1662b0698603e9d4f Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 14 Aug 2017 06:22:14 -0400 Subject: media: v4l: fwnode: The clock lane is the first lane in lane_polarities The clock lane is the first lane in the lane_polarities array. Reflect this consistently by putting the number of data lanes after the number of clock lanes. Fixes: 4ee236219f6d ("media: v4l2-fwnode: suppress a warning at OF parsing logic") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-fwnode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index cb34dcb0bb65..08e743fb7944 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -42,7 +42,7 @@ struct v4l2_fwnode_bus_mipi_csi2 { unsigned char data_lanes[MAX_DATA_LANES]; unsigned char clock_lane; unsigned short num_data_lanes; - bool lane_polarities[MAX_DATA_LANES + 1]; + bool lane_polarities[1 + MAX_DATA_LANES]; }; /** -- cgit From ad3cdf3e1f37299fc1422f7828243635249a3dde Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 14 Aug 2017 06:43:07 -0400 Subject: media: v4l: fwnode: Use a less clash-prone name for MAX_DATA_LANES macro Avoid using a generic name such as MAX_DATA_LANES in a header file widely included in drivers. Instead, call it V4L2_FWNODE_CSI2_MAX_DATA_LANES. Fixes: 4ee236219f6d ("media: v4l2-fwnode: suppress a warning at OF parsing logic") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-fwnode.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h index 08e743fb7944..7adec9851d9e 100644 --- a/include/media/v4l2-fwnode.h +++ b/include/media/v4l2-fwnode.h @@ -26,7 +26,7 @@ struct fwnode_handle; -#define MAX_DATA_LANES 4 +#define V4L2_FWNODE_CSI2_MAX_DATA_LANES 4 /** * struct v4l2_fwnode_bus_mipi_csi2 - MIPI CSI-2 bus data structure @@ -39,10 +39,10 @@ struct fwnode_handle; */ struct v4l2_fwnode_bus_mipi_csi2 { unsigned int flags; - unsigned char data_lanes[MAX_DATA_LANES]; + unsigned char data_lanes[V4L2_FWNODE_CSI2_MAX_DATA_LANES]; unsigned char clock_lane; unsigned short num_data_lanes; - bool lane_polarities[1 + MAX_DATA_LANES]; + bool lane_polarities[1 + V4L2_FWNODE_CSI2_MAX_DATA_LANES]; }; /** -- cgit From 5b6f9abe5a49df81737fbbfba890ee5b093f8168 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Mon, 21 Aug 2017 07:34:10 -0400 Subject: media: vb2: add bidirectional flag in vb2_queue This change is intended to give to the v4l2 drivers a choice to change the default behavior of the v4l2-core DMA mapping direction from DMA_TO/FROM_DEVICE (depending on the buffer type CAPTURE or OUTPUT) to DMA_BIDIRECTIONAL during queue_init time. Initially the issue with DMA mapping direction has been found in Venus encoder driver where the hardware (firmware side) adds few lines padding on bottom of the image buffer, and the consequence is triggering of IOMMU protection faults. This will help supporting venus encoder (and probably other drivers in the future) which wants to map output type of buffers as read/write. Signed-off-by: Stanimir Varbanov Acked-by: Marek Szyprowski Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/videobuf2-core.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index cb97c224be73..ef9b64398c8c 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -427,6 +427,16 @@ struct vb2_buf_ops { * @dev: device to use for the default allocation context if the driver * doesn't fill in the @alloc_devs array. * @dma_attrs: DMA attributes to use for the DMA. + * @bidirectional: when this flag is set the DMA direction for the buffers of + * this queue will be overridden with DMA_BIDIRECTIONAL direction. + * This is useful in cases where the hardware (firmware) writes to + * a buffer which is mapped as read (DMA_TO_DEVICE), or reads from + * buffer which is mapped for write (DMA_FROM_DEVICE) in order + * to satisfy some internal hardware restrictions or adds a padding + * needed by the processing algorithm. In case the DMA mapping is + * not bidirectional but the hardware (firmware) trying to access + * the buffer (in the opposite direction) this could lead to an + * IOMMU protection faults. * @fileio_read_once: report EOF after reading the first buffer * @fileio_write_immediately: queue buffer after each write() call * @allow_zero_bytesused: allow bytesused == 0 to be passed to the driver @@ -465,6 +475,7 @@ struct vb2_buf_ops { * Private elements (won't appear at the uAPI book): * @mmap_lock: private mutex used when buffers are allocated/freed/mmapped * @memory: current memory type used + * @dma_dir: DMA mapping direction. * @bufs: videobuf buffer structures * @num_buffers: number of allocated/used buffers * @queued_list: list of buffers currently queued from userspace @@ -495,6 +506,7 @@ struct vb2_queue { unsigned int io_modes; struct device *dev; unsigned long dma_attrs; + unsigned bidirectional:1; unsigned fileio_read_once:1; unsigned fileio_write_immediately:1; unsigned allow_zero_bytesused:1; @@ -516,6 +528,7 @@ struct vb2_queue { /* private: internal use only */ struct mutex mmap_lock; unsigned int memory; + enum dma_data_direction dma_dir; struct vb2_buffer *bufs[VB2_MAX_FRAME]; unsigned int num_buffers; -- cgit From d1b3437ed780cd97b4b1300db96a4d8faae6fea1 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 8 Aug 2017 09:29:58 -0400 Subject: media: v4l: Add packed Bayer raw12 pixel formats These formats are compressed 12-bit raw bayer formats with four different pixel orders. They are similar to 10-bit variants. The formats added by this patch are V4L2_PIX_FMT_SBGGR12P V4L2_PIX_FMT_SGBRG12P V4L2_PIX_FMT_SGRBG12P V4L2_PIX_FMT_SRGGB12P Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 45cf7359822c..185d6a0acc06 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -603,6 +603,11 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_SGBRG12 v4l2_fourcc('G', 'B', '1', '2') /* 12 GBGB.. RGRG.. */ #define V4L2_PIX_FMT_SGRBG12 v4l2_fourcc('B', 'A', '1', '2') /* 12 GRGR.. BGBG.. */ #define V4L2_PIX_FMT_SRGGB12 v4l2_fourcc('R', 'G', '1', '2') /* 12 RGRG.. GBGB.. */ + /* 12bit raw bayer packed, 6 bytes for every 4 pixels */ +#define V4L2_PIX_FMT_SBGGR12P v4l2_fourcc('p', 'B', 'C', 'C') +#define V4L2_PIX_FMT_SGBRG12P v4l2_fourcc('p', 'G', 'C', 'C') +#define V4L2_PIX_FMT_SGRBG12P v4l2_fourcc('p', 'g', 'C', 'C') +#define V4L2_PIX_FMT_SRGGB12P v4l2_fourcc('p', 'R', 'C', 'C') #define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16 BGBG.. GRGR.. */ #define V4L2_PIX_FMT_SGBRG16 v4l2_fourcc('G', 'B', '1', '6') /* 16 GBGB.. RGRG.. */ #define V4L2_PIX_FMT_SGRBG16 v4l2_fourcc('G', 'R', '1', '6') /* 16 GRGR.. BGBG.. */ -- cgit From 503dd28af108888c505e8d6a86f4acf5eb20f3b7 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 18 Jul 2017 09:26:59 -0400 Subject: media: v4l2-flash-led-class: Create separate sub-devices for indicators The V4L2 flash interface allows controlling multiple LEDs through a single sub-devices if, and only if, these LEDs are of different types. This approach scales badly for flash controllers that drive multiple flash LEDs or for LED specific associations. Essentially, the original assumption of a LED driver chip that drives a single flash LED and an indicator LED is no longer valid. Address the matter by registering one sub-device per LED. Signed-off-by: Sakari Ailus Reviewed-by: Jacek Anaszewski Acked-by: Pavel Machek Reviewed-by: Rui Miguel Silva (for greybus/light) Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-flash-led-class.h | 40 +++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-flash-led-class.h b/include/media/v4l2-flash-led-class.h index 54e31a805a88..1b515166ad60 100644 --- a/include/media/v4l2-flash-led-class.h +++ b/include/media/v4l2-flash-led-class.h @@ -56,8 +56,7 @@ struct v4l2_flash_ops { * struct v4l2_flash_config - V4L2 Flash sub-device initialization data * @dev_name: the name of the media entity, * unique in the system - * @torch_intensity: constraints for the LED in torch mode - * @indicator_intensity: constraints for the indicator LED + * @intensity: non-flash strobe constraints for the LED * @flash_faults: bitmask of flash faults that the LED flash class * device can report; corresponding LED_FAULT* bit * definitions are available in the header file @@ -66,8 +65,7 @@ struct v4l2_flash_ops { */ struct v4l2_flash_config { char dev_name[32]; - struct led_flash_setting torch_intensity; - struct led_flash_setting indicator_intensity; + struct led_flash_setting intensity; u32 flash_faults; unsigned int has_external_strobe:1; }; @@ -110,8 +108,6 @@ static inline struct v4l2_flash *v4l2_ctrl_to_v4l2_flash(struct v4l2_ctrl *c) * @dev: flash device, e.g. an I2C device * @fwn: fwnode_handle of the LED, may be NULL if the same as device's * @fled_cdev: LED flash class device to wrap - * @iled_cdev: LED flash class device representing indicator LED associated - * with fled_cdev, may be NULL * @ops: V4L2 Flash device ops * @config: initialization data for V4L2 Flash sub-device * @@ -124,9 +120,24 @@ static inline struct v4l2_flash *v4l2_ctrl_to_v4l2_flash(struct v4l2_ctrl *c) struct v4l2_flash *v4l2_flash_init( struct device *dev, struct fwnode_handle *fwn, struct led_classdev_flash *fled_cdev, - struct led_classdev *iled_cdev, - const struct v4l2_flash_ops *ops, - struct v4l2_flash_config *config); + const struct v4l2_flash_ops *ops, struct v4l2_flash_config *config); + +/** + * v4l2_flash_indicator_init - initialize V4L2 indicator sub-device + * @dev: flash device, e.g. an I2C device + * @fwn: fwnode_handle of the LED, may be NULL if the same as device's + * @iled_cdev: LED flash class device representing the indicator LED + * @config: initialization data for V4L2 Flash sub-device + * + * Create V4L2 Flash sub-device wrapping given LED subsystem device. + * + * Returns: A valid pointer, or, when an error occurs, the return + * value is encoded using ERR_PTR(). Use IS_ERR() to check and + * PTR_ERR() to obtain the numeric return value. + */ +struct v4l2_flash *v4l2_flash_indicator_init( + struct device *dev, struct fwnode_handle *fwn, + struct led_classdev *iled_cdev, struct v4l2_flash_config *config); /** * v4l2_flash_release - release V4L2 Flash sub-device @@ -140,9 +151,14 @@ void v4l2_flash_release(struct v4l2_flash *v4l2_flash); static inline struct v4l2_flash *v4l2_flash_init( struct device *dev, struct fwnode_handle *fwn, struct led_classdev_flash *fled_cdev, - struct led_classdev *iled_cdev, - const struct v4l2_flash_ops *ops, - struct v4l2_flash_config *config) + const struct v4l2_flash_ops *ops, struct v4l2_flash_config *config) +{ + return NULL; +} + +static inline struct v4l2_flash *v4l2_flash_indicator_init( + struct device *dev, struct fwnode_handle *fwn, + struct led_classdev *iled_cdev, struct v4l2_flash_config *config) { return NULL; } -- cgit From 40cefffd35f62618210d9d598f2c362095693ffd Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 19 Jul 2017 18:38:36 -0400 Subject: media: v4l2-flash-led-class: Document v4l2_flash_init() references The v4l2_flash_init() keeps a reference to the ops struct but not to the config struct (nor anything it contains). Document this. Signed-off-by: Sakari Ailus Acked-by: Pavel Machek Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-flash-led-class.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-flash-led-class.h b/include/media/v4l2-flash-led-class.h index 1b515166ad60..5c1d50f78e12 100644 --- a/include/media/v4l2-flash-led-class.h +++ b/include/media/v4l2-flash-led-class.h @@ -112,6 +112,9 @@ static inline struct v4l2_flash *v4l2_ctrl_to_v4l2_flash(struct v4l2_ctrl *c) * @config: initialization data for V4L2 Flash sub-device * * Create V4L2 Flash sub-device wrapping given LED subsystem device. + * The ops pointer is stored by the V4L2 flash framework. No + * references are held to config nor its contents once this function + * has returned. * * Returns: A valid pointer, or, when an error occurs, the return * value is encoded using ERR_PTR(). Use IS_ERR() to check and @@ -130,6 +133,9 @@ struct v4l2_flash *v4l2_flash_init( * @config: initialization data for V4L2 Flash sub-device * * Create V4L2 Flash sub-device wrapping given LED subsystem device. + * The ops pointer is stored by the V4L2 flash framework. No + * references are held to config nor its contents once this function + * has returned. * * Returns: A valid pointer, or, when an error occurs, the return * value is encoded using ERR_PTR(). Use IS_ERR() to check and -- cgit From a1cc5a57e49e871ce5e610597af1bd6bea06cb5c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 13 Aug 2017 16:21:17 +0200 Subject: blackfin: merge the two TWI header files There seems to be no need for separate ones since all users include both files anyhow. Merge them because include/linux/i2c is to be deprecated. Signed-off-by: Wolfram Sang --- include/linux/i2c/bfin_twi.h | 145 ------------------------------------------- 1 file changed, 145 deletions(-) delete mode 100644 include/linux/i2c/bfin_twi.h (limited to 'include') diff --git a/include/linux/i2c/bfin_twi.h b/include/linux/i2c/bfin_twi.h deleted file mode 100644 index 135a4e0876ae..000000000000 --- a/include/linux/i2c/bfin_twi.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * i2c-bfin-twi.h - interface to ADI TWI controller - * - * Copyright 2005-2014 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#ifndef __I2C_BFIN_TWI_H__ -#define __I2C_BFIN_TWI_H__ - -#include -#include - -/* - * ADI twi registers layout - */ -struct bfin_twi_regs { - u16 clkdiv; - u16 dummy1; - u16 control; - u16 dummy2; - u16 slave_ctl; - u16 dummy3; - u16 slave_stat; - u16 dummy4; - u16 slave_addr; - u16 dummy5; - u16 master_ctl; - u16 dummy6; - u16 master_stat; - u16 dummy7; - u16 master_addr; - u16 dummy8; - u16 int_stat; - u16 dummy9; - u16 int_mask; - u16 dummy10; - u16 fifo_ctl; - u16 dummy11; - u16 fifo_stat; - u16 dummy12; - u32 __pad[20]; - u16 xmt_data8; - u16 dummy13; - u16 xmt_data16; - u16 dummy14; - u16 rcv_data8; - u16 dummy15; - u16 rcv_data16; - u16 dummy16; -}; - -struct bfin_twi_iface { - int irq; - spinlock_t lock; - char read_write; - u8 command; - u8 *transPtr; - int readNum; - int writeNum; - int cur_mode; - int manual_stop; - int result; - struct i2c_adapter adap; - struct completion complete; - struct i2c_msg *pmsg; - int msg_num; - int cur_msg; - u16 saved_clkdiv; - u16 saved_control; - struct bfin_twi_regs __iomem *regs_base; -}; - -/* ******************** TWO-WIRE INTERFACE (TWI) MASKS ********************/ -/* TWI_CLKDIV Macros (Use: *pTWI_CLKDIV = CLKLOW(x)|CLKHI(y); ) */ -#define CLKLOW(x) ((x) & 0xFF) /* Periods Clock Is Held Low */ -#define CLKHI(y) (((y)&0xFF)<<0x8) /* Periods Before New Clock Low */ - -/* TWI_PRESCALE Masks */ -#define PRESCALE 0x007F /* SCLKs Per Internal Time Reference (10MHz) */ -#define TWI_ENA 0x0080 /* TWI Enable */ -#define SCCB 0x0200 /* SCCB Compatibility Enable */ - -/* TWI_SLAVE_CTL Masks */ -#define SEN 0x0001 /* Slave Enable */ -#define SADD_LEN 0x0002 /* Slave Address Length */ -#define STDVAL 0x0004 /* Slave Transmit Data Valid */ -#define NAK 0x0008 /* NAK Generated At Conclusion Of Transfer */ -#define GEN 0x0010 /* General Call Address Matching Enabled */ - -/* TWI_SLAVE_STAT Masks */ -#define SDIR 0x0001 /* Slave Transfer Direction (RX/TX*) */ -#define GCALL 0x0002 /* General Call Indicator */ - -/* TWI_MASTER_CTL Masks */ -#define MEN 0x0001 /* Master Mode Enable */ -#define MADD_LEN 0x0002 /* Master Address Length */ -#define MDIR 0x0004 /* Master Transmit Direction (RX/TX*) */ -#define FAST 0x0008 /* Use Fast Mode Timing Specs */ -#define STOP 0x0010 /* Issue Stop Condition */ -#define RSTART 0x0020 /* Repeat Start or Stop* At End Of Transfer */ -#define DCNT 0x3FC0 /* Data Bytes To Transfer */ -#define SDAOVR 0x4000 /* Serial Data Override */ -#define SCLOVR 0x8000 /* Serial Clock Override */ - -/* TWI_MASTER_STAT Masks */ -#define MPROG 0x0001 /* Master Transfer In Progress */ -#define LOSTARB 0x0002 /* Lost Arbitration Indicator (Xfer Aborted) */ -#define ANAK 0x0004 /* Address Not Acknowledged */ -#define DNAK 0x0008 /* Data Not Acknowledged */ -#define BUFRDERR 0x0010 /* Buffer Read Error */ -#define BUFWRERR 0x0020 /* Buffer Write Error */ -#define SDASEN 0x0040 /* Serial Data Sense */ -#define SCLSEN 0x0080 /* Serial Clock Sense */ -#define BUSBUSY 0x0100 /* Bus Busy Indicator */ - -/* TWI_INT_SRC and TWI_INT_ENABLE Masks */ -#define SINIT 0x0001 /* Slave Transfer Initiated */ -#define SCOMP 0x0002 /* Slave Transfer Complete */ -#define SERR 0x0004 /* Slave Transfer Error */ -#define SOVF 0x0008 /* Slave Overflow */ -#define MCOMP 0x0010 /* Master Transfer Complete */ -#define MERR 0x0020 /* Master Transfer Error */ -#define XMTSERV 0x0040 /* Transmit FIFO Service */ -#define RCVSERV 0x0080 /* Receive FIFO Service */ - -/* TWI_FIFO_CTRL Masks */ -#define XMTFLUSH 0x0001 /* Transmit Buffer Flush */ -#define RCVFLUSH 0x0002 /* Receive Buffer Flush */ -#define XMTINTLEN 0x0004 /* Transmit Buffer Interrupt Length */ -#define RCVINTLEN 0x0008 /* Receive Buffer Interrupt Length */ - -/* TWI_FIFO_STAT Masks */ -#define XMTSTAT 0x0003 /* Transmit FIFO Status */ -#define XMT_EMPTY 0x0000 /* Transmit FIFO Empty */ -#define XMT_HALF 0x0001 /* Transmit FIFO Has 1 Byte To Write */ -#define XMT_FULL 0x0003 /* Transmit FIFO Full (2 Bytes To Write) */ - -#define RCVSTAT 0x000C /* Receive FIFO Status */ -#define RCV_EMPTY 0x0000 /* Receive FIFO Empty */ -#define RCV_HALF 0x0004 /* Receive FIFO Has 1 Byte To Read */ -#define RCV_FULL 0x000C /* Receive FIFO Full (2 Bytes To Read) */ - -#endif -- cgit From 83b2a3c2ab24561cb6de45e6b155e3a7c4c1c91b Mon Sep 17 00:00:00 2001 From: Pierre-Hugues Husson Date: Sun, 27 Aug 2017 15:58:31 +0200 Subject: regulator: rn5t618: add RC5T619 PMIC support Extend the driver to support Ricoh RC5T619. Support the additional regulators and slightly different voltage ranges. Signed-off-by: Pierre-Hugues Husson Signed-off-by: Mark Brown --- include/linux/mfd/rn5t618.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h index e5a6cdeb77db..d61bc58aba8a 100644 --- a/include/linux/mfd/rn5t618.h +++ b/include/linux/mfd/rn5t618.h @@ -226,11 +226,17 @@ enum { RN5T618_DCDC2, RN5T618_DCDC3, RN5T618_DCDC4, + RN5T618_DCDC5, RN5T618_LDO1, RN5T618_LDO2, RN5T618_LDO3, RN5T618_LDO4, RN5T618_LDO5, + RN5T618_LDO6, + RN5T618_LDO7, + RN5T618_LDO8, + RN5T618_LDO9, + RN5T618_LDO10, RN5T618_LDORTC1, RN5T618_LDORTC2, RN5T618_REG_NUM, -- cgit From 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 12:12:25 -0700 Subject: Clarify (and fix) MAX_LFS_FILESIZE macros We have a MAX_LFS_FILESIZE macro that is meant to be filled in by filesystems (and other IO targets) that know they are 64-bit clean and don't have any 32-bit limits in their IO path. It turns out that our 32-bit value for that limit was bogus. On 32-bit, the VM layer is limited by the page cache to only 32-bit index values, but our logic for that was confusing and actually wrong. We used to define that value to (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) which is actually odd in several ways: it limits the index to 31 bits, and then it limits files so that they can't have data in that last byte of a page that has the highest 31-bit index (ie page index 0x7fffffff). Neither of those limitations make sense. The index is actually the full 32 bit unsigned value, and we can use that whole full page. So the maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG". However, we do wan tto avoid the maximum index, because we have code that iterates over the page indexes, and we don't want that code to overflow. So the maximum size of a file on a 32-bit host should actually be one page less than the full 32-bit index. So the actual limit is ULONG_MAX << PAGE_SHIFT. That means that we will not actually be using the page of that last index (ULONG_MAX), but we can grow a file up to that limit. The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5 volume. It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one byte less), but the actual true VM limit is one page less than 16TiB. This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), which started applying that MAX_LFS_FILESIZE limit to block devices too. NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is actually just the offset type itself (loff_t), which is signed. But for clarity, on 64-bit, just use the maximum signed value, and don't make people have to count the number of 'f' characters in the hex constant. So just use LLONG_MAX for the 64-bit case. That was what the value had been before too, just written out as a hex constant. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-and-tested-by: Doug Nazar Cc: Andreas Dilger Cc: Mark Fasheh Cc: Joel Becker Cc: Dave Kleikamp Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..cbfe127bccf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -907,9 +907,9 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif #define FL_POSIX 1 -- cgit From a9e4998073d49a762a154a6b48a332ec6cb8e6b1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Jul 2017 18:12:07 -0400 Subject: media: dvb_frontend: ensure that inital front end status initialized The fe_status variable s is not initialized meaning it can have any random garbage status. This could be problematic if fe->ops.tune is false as s is not updated by the call to fe->ops.tune() and a subsequent check on the change status will using a garbage value. Fix this by adding FE_NONE to the enum fe_status and initializing s to this. Detected by CoverityScan, CID#112887 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Reviewed-by: Shuah Khan Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index 00a20cd21ee2..afc3972b0879 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -127,6 +127,7 @@ enum fe_sec_mini_cmd { * to reset DiSEqC, tone and parameters */ enum fe_status { + FE_NONE = 0x00, FE_HAS_SIGNAL = 0x01, FE_HAS_CARRIER = 0x02, FE_HAS_VITERBI = 0x04, -- cgit From 8774370d268f2f43d8487d230e0d4fa1647759b3 Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Mon, 17 Jul 2017 22:09:45 -0700 Subject: i40e/i40evf: support for VF VLAN tag stripping control This patch gives VF capability to control VLAN tag stripping via ethtool. As rx-vlan-offload was fixed before, now the VF is able to change it using "ethtool --offload rxvlan on/off" settings. Signed-off-by: Mariusz Stachura Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index becfca2ae94e..2b038442c352 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -133,6 +133,8 @@ enum virtchnl_ops { VIRTCHNL_OP_CONFIG_RSS_LUT = 24, VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, VIRTCHNL_OP_SET_RSS_HENA = 26, + VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, + VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, }; /* This macro is used to generate a compilation error if a structure @@ -686,6 +688,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_SET_RSS_HENA: valid_len = sizeof(struct virtchnl_rss_hena); break; + case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: + case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit From f75b0afa190d041c142bdc3f0c6022f61340b8a3 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 24 Aug 2017 10:42:50 +0900 Subject: PM / devfreq: Move private devfreq_update_stats() into devfreq THe devfreq_update_stats() updates the 'struct devfreq_dev_status' in order to get current status of devfreq device. It is only used for the governors. This patch moves the devfreq_update_stats() into devfreq directory. Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 6c220e4ebb6b..597294e0cc40 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -214,19 +214,6 @@ extern void devm_devfreq_unregister_notifier(struct device *dev, extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index); -/** - * devfreq_update_stats() - update the last_status pointer in struct devfreq - * @df: the devfreq instance whose status needs updating - * - * Governors are recommended to use this function along with last_status, - * which allows other entities to reuse the last_status without affecting - * the values fetched later by governors. - */ -static inline int devfreq_update_stats(struct devfreq *df) -{ - return df->profile->get_dev_status(df->dev.parent, &df->last_status); -} - #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq -- cgit From 3e00ab4ac51c2ed47c28fd5000c47399f1a11cf5 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Tue, 1 Aug 2017 19:41:42 +0530 Subject: dmaengine: add DMA_PREP_CMD for non-Data descriptors. Some of the DMA controllers are capable of issuing the commands to peripheral by the DMA. These commands can be list of register reads/writes and its different from normal data reads/writes. This patch adds new flag DMA_PREP_CMD in DMA API which tells the driver that the data passed to DMA API is command data and DMA controller driver will form descriptor in the required format. This flag can be used by any DMA controller driver which requires the descriptor in different format for non-Data descriptors. Signed-off-by: Abhishek Sahu Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 533680860865..dd4de1d40166 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -186,6 +186,9 @@ struct dma_interleaved_template { * on the result of this operation * @DMA_CTRL_REUSE: client can reuse the descriptor and submit again till * cleared or freed + * @DMA_PREP_CMD: tell the driver that the data passed to DMA API is command + * data and the descriptor should be in different format from normal + * data descriptors. */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -195,6 +198,7 @@ enum dma_ctrl_flags { DMA_PREP_CONTINUE = (1 << 4), DMA_PREP_FENCE = (1 << 5), DMA_CTRL_REUSE = (1 << 6), + DMA_PREP_CMD = (1 << 7), }; /** -- cgit From dfebb055f73a24e2c8756b837d9ce1a06457f5d7 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Tue, 1 Aug 2017 19:41:43 +0530 Subject: dmaengine: qcom: bam_dma: wrapper functions for command descriptor QCOM BAM also supports command descriptor which allows the SW to create descriptors of type command which does not generate any data transmissions but configures registers in the peripheral. In command descriptor the 32bit address point to the start of the command block which holds the command elements and the 16bit size define the size of the command block. Each Command Element is structured by 4 words: Write command: address + cmd register data register mask reserved Read command: address + cmd read data result address, reserved reserved This patch creates a new header file for BAM driver which contains the structures and wrapper functions for command descriptor. This file will be used by different QCOM peripheral drivers for forming the command descriptor Signed-off-by: Abhishek Sahu Signed-off-by: Vinod Koul --- include/linux/dma/qcom_bam_dma.h | 79 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 include/linux/dma/qcom_bam_dma.h (limited to 'include') diff --git a/include/linux/dma/qcom_bam_dma.h b/include/linux/dma/qcom_bam_dma.h new file mode 100644 index 000000000000..077d43a358e5 --- /dev/null +++ b/include/linux/dma/qcom_bam_dma.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _QCOM_BAM_DMA_H +#define _QCOM_BAM_DMA_H + +#include + +/* + * This data type corresponds to the native Command Element + * supported by BAM DMA Engine. + * + * @cmd_and_addr - upper 8 bits command and lower 24 bits register address. + * @data - for write command: content to be written into peripheral register. + * for read command: dest addr to write peripheral register value. + * @mask - register mask. + * @reserved - for future usage. + * + */ +struct bam_cmd_element { + __le32 cmd_and_addr; + __le32 data; + __le32 mask; + __le32 reserved; +}; + +/* + * This enum indicates the command type in a command element + */ +enum bam_command_type { + BAM_WRITE_COMMAND = 0, + BAM_READ_COMMAND, +}; + +/* + * prep_bam_ce_le32 - Wrapper function to prepare a single BAM command + * element with the data already in le32 format. + * + * @bam_ce: bam command element + * @addr: target address + * @cmd: BAM command + * @data: actual data for write and dest addr for read in le32 + */ +static inline void +bam_prep_ce_le32(struct bam_cmd_element *bam_ce, u32 addr, + enum bam_command_type cmd, __le32 data) +{ + bam_ce->cmd_and_addr = + cpu_to_le32((addr & 0xffffff) | ((cmd & 0xff) << 24)); + bam_ce->data = data; + bam_ce->mask = cpu_to_le32(0xffffffff); +} + +/* + * bam_prep_ce - Wrapper function to prepare a single BAM command element + * with the data. + * + * @bam_ce: BAM command element + * @addr: target address + * @cmd: BAM command + * @data: actual data for write and dest addr for read + */ +static inline void +bam_prep_ce(struct bam_cmd_element *bam_ce, u32 addr, + enum bam_command_type cmd, u32 data) +{ + bam_prep_ce_le32(bam_ce, addr, cmd, cpu_to_le32(data)); +} +#endif -- cgit From 604407890ecf624c2fb41013c82b22aade59b455 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Aug 2017 11:32:11 -0700 Subject: dm: fix printk() rate limiting code Using the same rate limiting state for different kinds of messages is wrong because this can cause a high frequency message to suppress a report of a low frequency message. Hence use a unique rate limiting state per message type. Fixes: 71a16736a15e ("dm: use local printk ratelimit") Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 1473455d0341..4f2b3b2076c4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -549,46 +549,29 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); *---------------------------------------------------------------*/ #define DM_NAME "device-mapper" -#ifdef CONFIG_PRINTK -extern struct ratelimit_state dm_ratelimit_state; - -#define dm_ratelimit() __ratelimit(&dm_ratelimit_state) -#else -#define dm_ratelimit() 0 -#endif +#define DM_RATELIMIT(pr_func, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&rs)) \ + pr_func(DM_FMT(fmt), ##__VA_ARGS__); \ +} while (0) #define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" #define DMCRIT(fmt, ...) pr_crit(DM_FMT(fmt), ##__VA_ARGS__) #define DMERR(fmt, ...) pr_err(DM_FMT(fmt), ##__VA_ARGS__) -#define DMERR_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMERR(fmt, ##__VA_ARGS__); \ -} while (0) - +#define DMERR_LIMIT(fmt, ...) DM_RATELIMIT(pr_err, fmt, ##__VA_ARGS__) #define DMWARN(fmt, ...) pr_warn(DM_FMT(fmt), ##__VA_ARGS__) -#define DMWARN_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMWARN(fmt, ##__VA_ARGS__); \ -} while (0) - +#define DMWARN_LIMIT(fmt, ...) DM_RATELIMIT(pr_warn, fmt, ##__VA_ARGS__) #define DMINFO(fmt, ...) pr_info(DM_FMT(fmt), ##__VA_ARGS__) -#define DMINFO_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMINFO(fmt, ##__VA_ARGS__); \ -} while (0) +#define DMINFO_LIMIT(fmt, ...) DM_RATELIMIT(pr_info, fmt, ##__VA_ARGS__) #ifdef CONFIG_DM_DEBUG #define DMDEBUG(fmt, ...) printk(KERN_DEBUG DM_FMT(fmt), ##__VA_ARGS__) -#define DMDEBUG_LIMIT(fmt, ...) \ -do { \ - if (dm_ratelimit()) \ - DMDEBUG(fmt, ##__VA_ARGS__); \ -} while (0) +#define DMDEBUG_LIMIT(fmt, ...) DM_RATELIMIT(pr_debug, fmt, ##__VA_ARGS__) #else #define DMDEBUG(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #define DMDEBUG_LIMIT(fmt, ...) no_printk(fmt, ##__VA_ARGS__) -- cgit From b97971bee55dc45420e0fe352d0b4df6e74716d4 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Wed, 2 Aug 2017 10:22:01 -0600 Subject: coresight: pmu: Adds return stack option to perf coresight pmu Return stack is a programmable option on some ETM and PTM hardware. Adds the option flags to enable this from the perf event command line. Signed-off-by: Mike Leach Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight-pmu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 7d410260661b..45852c2cd096 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -24,6 +24,7 @@ /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 #define ETM_OPT_TS 28 +#define ETM_OPT_RETSTK 29 static inline int coresight_get_trace_id(int cpu) { -- cgit From a09ac3974d0c0b25dfa70f8076546bc4876dd7a8 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Thu, 20 Jul 2017 08:22:36 +0100 Subject: tty: resolve tty contention between kernel and user space The commit 12e84c71b7d4 ("tty: export tty_open_by_driver") exports tty_open_by_device to allow tty to be opened from inside kernel which works fine except that it doesn't handle contention with user space or another kernel-space open of the same tty. For example, opening a tty from user space while it is kernel opened results in failure and a kernel log message about mismatch between tty->count and tty's file open count. This patch makes kernel access to tty exclusive, so that if a user process or kernel opens a kernel opened tty, it gets -EBUSY. It does this by adding TTY_KOPENED flag to tty->flags. When this flag is set, tty_open_by_driver returns -EBUSY. Instead of overloading tty_open_by_driver for both kernel and user space, this patch creates a separate function tty_kopen which closely follows tty_open_by_driver. tty_kclose closes the tty opened by tty_kopen. To address the mismatch between tty->count and #fd's, this patch adds #kopen's to the count before comparing it with tty->count. That way check_tty_count reflects correct usage count. Returning -EBUSY on tty open is a change in the interface. I have tested this with minicom, picocom and commands like "echo foo > /dev/ttyS0". They all correctly report "Device or resource busy" when the tty is already kernel opened. Signed-off-by: Okash Khawaja Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 79c30daf46a9..236d52b8be8c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -261,6 +261,8 @@ struct tty_port { */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ +#define TTY_PORT_KOPENED 5 /* device exclusively opened by + kernel */ /* * Where all of the state associated with a tty is kept while the tty @@ -401,6 +403,8 @@ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); extern struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, struct file *filp); +extern struct tty_struct *tty_kopen(dev_t device); +extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); #else static inline void tty_kref_put(struct tty_struct *tty) @@ -425,6 +429,10 @@ static inline const char *tty_name(const struct tty_struct *tty) static inline struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, struct file *filp) { return NULL; } +static inline struct tty_struct *tty_kopen(dev_t device) +{ return ERR_PTR(-ENODEV); } +static inline void tty_kclose(struct tty_struct *tty) +{ } static inline int tty_dev_name_to_number(const char *name, dev_t *number) { return -ENOTSUPP; } #endif @@ -652,6 +660,19 @@ static inline void tty_port_set_initialized(struct tty_port *port, bool val) clear_bit(TTY_PORT_INITIALIZED, &port->iflags); } +static inline bool tty_port_kopened(struct tty_port *port) +{ + return test_bit(TTY_PORT_KOPENED, &port->iflags); +} + +static inline void tty_port_set_kopened(struct tty_port *port, bool val) +{ + if (val) + set_bit(TTY_PORT_KOPENED, &port->iflags); + else + clear_bit(TTY_PORT_KOPENED, &port->iflags); +} + extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); -- cgit From a033c3b10a2bec387ccd9e39fadd94eef6519626 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Thu, 20 Jul 2017 08:22:38 +0100 Subject: tty: undo export of tty_open_by_driver Since we have tty_kopen, we no longer need to export tty_open_by_driver. This patch makes this function static. Signed-off-by: Okash Khawaja Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 236d52b8be8c..cf53eb539f6e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -401,8 +401,6 @@ extern struct tty_struct *get_current_tty(void); /* tty_io.c */ extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); -extern struct tty_struct *tty_open_by_driver(dev_t device, struct inode *inode, - struct file *filp); extern struct tty_struct *tty_kopen(dev_t device); extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); @@ -426,9 +424,6 @@ static inline int __init tty_init(void) { return 0; } static inline const char *tty_name(const struct tty_struct *tty) { return "(none)"; } -static inline struct tty_struct *tty_open_by_driver(dev_t device, - struct inode *inode, struct file *filp) -{ return NULL; } static inline struct tty_struct *tty_kopen(dev_t device) { return ERR_PTR(-ENODEV); } static inline void tty_kclose(struct tty_struct *tty) -- cgit From 00ee84159020728cbdbfc569b42c036ef20c6e55 Mon Sep 17 00:00:00 2001 From: sayli karnik Date: Fri, 16 Jun 2017 15:39:38 +0530 Subject: mod_devicetable: Remove excess description from structured comment Remove excess member description to fix following warnings in sphinx build: Excess struct/union/enum/typedef member 'ver_major' description in 'fsl_mc_device_id' Excess struct/union/enum/typedef member 'ver_minor' description in 'fsl_mc_device_id' Signed-off-by: sayli karnik CC: Stuart Yoder Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3f74ef2281e8..694cebb50f72 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -674,8 +674,6 @@ struct ulpi_device_id { * struct fsl_mc_device_id - MC object device identifier * @vendor: vendor ID * @obj_type: MC object type - * @ver_major: MC object version major number - * @ver_minor: MC object version minor number * * Type of entries in the "device Id" table for MC object devices supported by * a MC object device driver. The last entry of the table has vendor set to 0x0 -- cgit From 9f757f415210a7c85e2784e4a1733ea78b2e4e88 Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Tue, 18 Jul 2017 08:32:53 +0200 Subject: drivers/fmc: hide fmc operations behind helpers This gave us more freedom to change/add/remove operations without recompiling all device driver. Typically, Carrier board implement the fmc operations, so they will not use these helpers. Signed-off-by: Federico Vaga Tested-by: Pat Riehecky Acked-by: Alessandro Rubini Signed-off-by: Greg Kroah-Hartman --- include/linux/fmc.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/fmc.h b/include/linux/fmc.h index a5f0aa5c2a8d..8bb4a154bbdc 100644 --- a/include/linux/fmc.h +++ b/include/linux/fmc.h @@ -234,4 +234,22 @@ extern void fmc_free_id_info(struct fmc_device *fmc); extern void fmc_dump_eeprom(const struct fmc_device *fmc); extern void fmc_dump_sdb(const struct fmc_device *fmc); +/* helpers for FMC operations */ +extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h, + char *name, int flags); +extern void fmc_irq_free(struct fmc_device *fmc); +extern void fmc_irq_ack(struct fmc_device *fmc); +extern int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv); +extern int fmc_gpio_config(struct fmc_device *fmc, struct fmc_gpio *gpio, + int ngpio); +extern int fmc_read_ee(struct fmc_device *fmc, int pos, void *d, int l); +extern int fmc_write_ee(struct fmc_device *fmc, int pos, const void *d, int l); + +/* helpers for FMC operations */ +extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h, + char *name, int flags); +extern void fmc_irq_free(struct fmc_device *fmc); +extern void fmc_irq_ack(struct fmc_device *fmc); +extern int fmc_validate(struct fmc_device *fmc, struct fmc_driver *drv); + #endif /* __LINUX_FMC_H__ */ -- cgit From 2071a3e94abd34d65bd40f1ff845f9cea300dfa6 Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Tue, 18 Jul 2017 08:33:03 +0200 Subject: drivers/fmc: The only way to dump the SDB is from debugfs Driver should not call fmc_sdb_dump() anymore. (actually they can but the operation is not supported, so it will print an error message) Signed-off-by: Federico Vaga Tested-by: Pat Riehecky Acked-by: Alessandro Rubini Signed-off-by: Greg Kroah-Hartman --- include/linux/fmc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fmc.h b/include/linux/fmc.h index 8bb4a154bbdc..5c8df0c49fc3 100644 --- a/include/linux/fmc.h +++ b/include/linux/fmc.h @@ -180,6 +180,9 @@ struct fmc_device { uint32_t device_id; /* Filled by the device */ char *mezzanine_name; /* Defaults to ``fmc'' */ void *mezzanine_data; + + struct dentry *dbg_dir; + struct dentry *dbg_sdb_dump; }; #define to_fmc_device(x) container_of((x), struct fmc_device, dev) @@ -232,7 +235,6 @@ extern int fmc_match(struct device *dev, struct device_driver *drv); extern int fmc_fill_id_info(struct fmc_device *fmc); extern void fmc_free_id_info(struct fmc_device *fmc); extern void fmc_dump_eeprom(const struct fmc_device *fmc); -extern void fmc_dump_sdb(const struct fmc_device *fmc); /* helpers for FMC operations */ extern int fmc_irq_request(struct fmc_device *fmc, irq_handler_t h, -- cgit From 15b1b0f0d87e34a87172d9b83bd260543a991c2e Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Tue, 18 Jul 2017 08:33:13 +0200 Subject: drivers/fmc: change registration prototype Permit use of either fmc_device_register_n or fmc_device_register_n_gw depending on the type of device in use. Signed-off-by: Federico Vaga Tested-by: Pat Riehecky Acked-by: Alessandro Rubini Signed-off-by: Greg Kroah-Hartman --- include/linux/fmc.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fmc.h b/include/linux/fmc.h index 5c8df0c49fc3..b6c73d54ca46 100644 --- a/include/linux/fmc.h +++ b/include/linux/fmc.h @@ -220,14 +220,23 @@ static inline void fmc_set_drvdata(struct fmc_device *fmc, void *data) dev_set_drvdata(&fmc->dev, data); } -/* The 4 access points */ +struct fmc_gateware { + void *bitstream; + unsigned long len; +}; + +/* The 5 access points */ extern int fmc_driver_register(struct fmc_driver *drv); extern void fmc_driver_unregister(struct fmc_driver *drv); extern int fmc_device_register(struct fmc_device *tdev); +extern int fmc_device_register_gw(struct fmc_device *tdev, + struct fmc_gateware *gw); extern void fmc_device_unregister(struct fmc_device *tdev); -/* Two more for device sets, all driven by the same FPGA */ +/* Three more for device sets, all driven by the same FPGA */ extern int fmc_device_register_n(struct fmc_device **devs, int n); +extern int fmc_device_register_n_gw(struct fmc_device **devs, int n, + struct fmc_gateware *gw); extern void fmc_device_unregister_n(struct fmc_device **devs, int n); /* Internal cross-calls between files; not exported to other modules */ -- cgit From 9c0dda14951e9ed30b6f04e14bf64a3c69686c5b Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Tue, 18 Jul 2017 08:33:24 +0200 Subject: drivers/fmc: carrier can program FPGA on registration The initial FPGA may require programming before it is useful. Signed-off-by: Federico Vaga Tested-by: Pat Riehecky Acked-by: Alessandro Rubini Signed-off-by: Greg Kroah-Hartman --- include/linux/fmc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fmc.h b/include/linux/fmc.h index b6c73d54ca46..3dc8a1b2db7b 100644 --- a/include/linux/fmc.h +++ b/include/linux/fmc.h @@ -132,6 +132,8 @@ struct fmc_operations { uint32_t (*read32)(struct fmc_device *fmc, int offset); void (*write32)(struct fmc_device *fmc, uint32_t value, int offset); int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv); + int (*reprogram_raw)(struct fmc_device *f, struct fmc_driver *d, + void *gw, unsigned long len); int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw); int (*irq_request)(struct fmc_device *fmc, irq_handler_t h, char *name, int flags); @@ -144,6 +146,8 @@ struct fmc_operations { }; /* Prefer this helper rather than calling of fmc->reprogram directly */ +int fmc_reprogram_raw(struct fmc_device *fmc, struct fmc_driver *d, + void *gw, unsigned long len, int sdb_entry); extern int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw, int sdb_entry); -- cgit From 6a7a81761bee2a6694c0a5fb1a16b263c96241c1 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 24 Aug 2017 16:09:10 -0600 Subject: driver core: Document struct device:dma_ops Commit 5657933dbb6e (treewide: Move dma_ops from struct dev_archdata into struct device) added the dma_ops field to struct device, but did not update the kerneldoc comment, yielding this warning: ./include/linux/device.h:969: warning: No description found for parameter 'dma_ops' Add a description and bring a little peace to the world. Signed-off-by: Jonathan Corbet Reviewed-by: Bart Van Assche Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 5db26b9c0fcc..d5bcc5f109eb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -847,6 +847,7 @@ struct dev_links_info { * @msi_list: Hosts MSI descriptors * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. + * @dma_ops: DMA mapping operations for this device. * @dma_mask: Dma mask (if dma'ble device). * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all * hardware supports 64-bit addresses for consistent allocations -- cgit From ddc088238cd6988bb4ac3776f403d7ff9d3c7a63 Mon Sep 17 00:00:00 2001 From: Pawel Baldysiak Date: Wed, 16 Aug 2017 17:13:45 +0200 Subject: md: Runtime support for multiple ppls Increase PPL area to 1MB and use it as circular buffer to store PPL. The entry with highest generation number is the latest one. If PPL to be written is larger then space left in a buffer, rewind the buffer to the start (don't wrap it). Signed-off-by: Pawel Baldysiak Signed-off-by: Artur Paszkiewicz Signed-off-by: Shaohua Li --- include/uapi/linux/raid/md_p.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index d500bd224979..b9197976b660 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -324,9 +324,10 @@ struct mdp_superblock_1 { #define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening * is guided by bitmap. */ -#define MD_FEATURE_CLUSTERED 256 /* clustered MD */ +#define MD_FEATURE_CLUSTERED 256 /* clustered MD */ #define MD_FEATURE_JOURNAL 512 /* support write cache */ #define MD_FEATURE_PPL 1024 /* support PPL */ +#define MD_FEATURE_MULTIPLE_PPLS 2048 /* support for multiple PPLs */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ @@ -338,6 +339,7 @@ struct mdp_superblock_1 { |MD_FEATURE_CLUSTERED \ |MD_FEATURE_JOURNAL \ |MD_FEATURE_PPL \ + |MD_FEATURE_MULTIPLE_PPLS \ ) struct r5l_payload_header { -- cgit From e0552573575ef43daaee5b1a139952d6127f088c Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 19 Jul 2017 23:35:29 -0300 Subject: misc: eeprom_93xx46: Include eeprom_93xx46_platform_data struct has a 'struct gpio_desc' type member, so it is better to include , which provides 'struct gpio_desc' type. Signed-off-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- include/linux/eeprom_93xx46.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h index 885f587a3555..915898759280 100644 --- a/include/linux/eeprom_93xx46.h +++ b/include/linux/eeprom_93xx46.h @@ -2,8 +2,7 @@ * Module: eeprom_93xx46 * platform description for 93xx46 EEPROMs. */ - -struct gpio_desc; +#include struct eeprom_93xx46_platform_data { unsigned char flags; -- cgit From d3cc8ca3ab6c1d78c26102870313fa36c48ea247 Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Tue, 22 Aug 2017 08:27:07 +0200 Subject: mux: include compiler.h from mux/consumer.h Required for __must_check. Signed-off-by: Ulrich Hecht Signed-off-by: Peter Rosin Signed-off-by: Greg Kroah-Hartman --- include/linux/mux/consumer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h index 5577e1b773c4..ea96d4c82be7 100644 --- a/include/linux/mux/consumer.h +++ b/include/linux/mux/consumer.h @@ -13,6 +13,8 @@ #ifndef _LINUX_MUX_CONSUMER_H #define _LINUX_MUX_CONSUMER_H +#include + struct device; struct mux_control; -- cgit From 242b476f821b055ed0fb70a8eb6defa85baada9c Mon Sep 17 00:00:00 2001 From: Johannes Poehlmann Date: Tue, 25 Jul 2017 13:27:11 +0200 Subject: w1: ds1wm: fix register offset (bus shift) calculation Replace incorrect register offsett calculation by direct configuration of bus_shift in mfd-cell. Indirect definition of address-shift by resource size was unobvious and was wrong (should have used a binary log). Signed-off-by: Johannes Poehlmann Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/ds1wm.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h index 38a372a0e285..79a01e8dc83f 100644 --- a/include/linux/mfd/ds1wm.h +++ b/include/linux/mfd/ds1wm.h @@ -1,13 +1,23 @@ -/* MFD cell driver data for the DS1WM driver */ +/* MFD cell driver data for the DS1WM driver + * + * to be defined in the MFD device that is + * using this driver for one of his sub devices + */ struct ds1wm_driver_data { int active_high; int clock_rate; - /* in milliseconds, the amount of time to */ - /* sleep following a reset pulse. Zero */ - /* should work if your bus devices recover*/ - /* time respects the 1-wire spec since the*/ - /* ds1wm implements the precise timings of*/ - /* a reset pulse/presence detect sequence.*/ + /* in milliseconds, the amount of time to + * sleep following a reset pulse. Zero + * should work if your bus devices recover + * time respects the 1-wire spec since the + * ds1wm implements the precise timings of + * a reset pulse/presence detect sequence. + */ unsigned int reset_recover_delay; + + /* left shift of register number to get register address offsett. + * Only 0,1,2 allowed for 8,16 or 32 bit bus width respectively + */ + unsigned int bus_shift; }; -- cgit From baa8055de0293a91f87bef5f32296d9ddcba9c56 Mon Sep 17 00:00:00 2001 From: Johannes Poehlmann Date: Tue, 25 Jul 2017 13:27:12 +0200 Subject: w1: ds1wm: make endian clean and use standard io memory accessors o Make endian clean, make HW-endianness configurable. o Use ioread*, iowrite* instead of __raw_readb,__raw_writeb to also use memory-barriers when accessing HW-registers. We do not want reordering to happen here. Both changes are tightly coupled, so I do them in one patch Signed-off-by: Johannes Poehlmann Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- include/linux/mfd/ds1wm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h index 79a01e8dc83f..2227c6a75d84 100644 --- a/include/linux/mfd/ds1wm.h +++ b/include/linux/mfd/ds1wm.h @@ -16,6 +16,11 @@ struct ds1wm_driver_data { */ unsigned int reset_recover_delay; + /* Say 1 here for big endian Hardware + * (only relevant with bus-shift > 0 + */ + bool is_hw_big_endian; + /* left shift of register number to get register address offsett. * Only 0,1,2 allowed for 8,16 or 32 bit bus width respectively */ -- cgit From 7a14724f54bf9889fcb1a9f1d4aa4e1d2e969d93 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 28 Aug 2017 08:33:20 -0700 Subject: libnvdimm: clean up command definitions Remove the command payloads that do not have an associated libnvdimm ioctl. I.e. remove the payloads that would only ever be carried in the ND_CMD_CALL envelope. This prevents userspace from growing unnecessary dependencies on this kernel header when userspace already has everything it needs to craft and send these commands. Cc: Jerry Hoemann Reported-by: Yasunori Goto Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 6d3c54264d8e..3f03567631cb 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -145,43 +145,6 @@ struct nd_cmd_clear_error { __u64 cleared; } __packed; -struct nd_cmd_trans_spa { - __u64 spa; - __u32 status; - __u8 flags; - __u8 _reserved[3]; - __u64 trans_length; - __u32 num_nvdimms; - struct nd_nvdimm_device { - __u32 nfit_device_handle; - __u32 _reserved; - __u64 dpa; - } __packed devices[0]; - -} __packed; - -struct nd_cmd_ars_err_inj { - __u64 err_inj_spa_range_base; - __u64 err_inj_spa_range_length; - __u8 err_inj_options; - __u32 status; -} __packed; - -struct nd_cmd_ars_err_inj_clr { - __u64 err_inj_clr_spa_range_base; - __u64 err_inj_clr_spa_range_length; - __u32 status; -} __packed; - -struct nd_cmd_ars_err_inj_stat { - __u32 status; - __u32 inj_err_rec_count; - struct nd_error_stat_query_record { - __u64 err_inj_stat_spa_range_base; - __u64 err_inj_stat_spa_range_length; - } __packed record[0]; -} __packed; - enum { ND_CMD_IMPLEMENTED = 0, -- cgit From d026d70a2e9460dc701d20b48672b80a2656006d Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Wed, 26 Jul 2017 11:34:46 +0200 Subject: nvmem: core: Add nvmem_cell_read_u32 This function does a quick and easy read of an u32 value without any kind of resource management code on the consumer side. Signed-off-by: Leonard Crestez Reviewed-by: Shawn Guo Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index c2256d746543..efafccffaee8 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -35,6 +35,7 @@ void nvmem_cell_put(struct nvmem_cell *cell); void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len); int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len); +int nvmem_cell_read_u32(struct device *dev, const char *cell_id, u32 *val); /* direct nvmem device read/write interface */ struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); @@ -85,6 +86,12 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, return -ENOSYS; } +static inline int nvmem_cell_read_u32(struct device *dev, + const char *cell_id, u32 *val) +{ + return -ENOSYS; +} + static inline struct nvmem_device *nvmem_device_get(struct device *dev, const char *name) { -- cgit From 4da69f49e73e3c79f079de62ea940cabbbf15ae7 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Jul 2017 11:34:48 +0200 Subject: nvmem: include linux/err.h from header missing err.h header file can cause simillar errors like below in some configurations. "error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration]" This adds the missing include to ensure we can always include the header. Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index efafccffaee8..4e85447f7860 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -12,6 +12,9 @@ #ifndef _LINUX_NVMEM_CONSUMER_H #define _LINUX_NVMEM_CONSUMER_H +#include +#include + struct device; struct device_node; /* consumer cookie */ -- cgit From df770ff0586a494fabe68ffbe2898d7df5666663 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Wed, 2 Aug 2017 10:22:19 -0600 Subject: perf: cs-etm: Fix ETMv4 CONFIGR entry in perf.data file The value passed into the perf.data file for the CONFIGR register in ETMv4 was incorrectly being set to the command line options/ETMv3 value. Adds bit definitions and function to remap this value to the correct ETMv4 CONFIGR bit values for all selected options. Signed-off-by: Mike Leach Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight-pmu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 45852c2cd096..edfeaba95429 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -26,6 +26,11 @@ #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 +/* ETMv4 CONFIGR programming bits for the ETM OPTs */ +#define ETM4_CFG_BIT_CYCACC 4 +#define ETM4_CFG_BIT_TS 11 +#define ETM4_CFG_BIT_RETSTK 12 + static inline int coresight_get_trace_id(int cpu) { /* -- cgit From 960632ece6949be1ab6f7a911faa4fa6e8305f4a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Thu, 24 Aug 2017 00:08:32 +0200 Subject: netfilter: convert hook list to an array This converts the storage and layout of netfilter hook entries from a linked list to an array. After this commit, hook entries will be stored adjacent in memory. The next pointer is no longer required. The ops pointers are stored at the end of the array as they are only used in the register/unregister path and in the legacy br_netfilter code. nf_unregister_net_hooks() is slower than needed as it just calls nf_unregister_net_hook in a loop (i.e. at least n synchronize_net() calls), this will be addressed in followup patch. Test setup: - ixgbe 10gbit - netperf UDP_STREAM, 64 byte packets - 5 hooks: (raw + mangle prerouting, mangle+filter input, inet filter): empty mangle and raw prerouting, mangle and filter input hooks: 353.9 this patch: 364.2 Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 45 ++++++++++++++++++++------------------- include/linux/netfilter_ingress.h | 4 ++-- include/net/netfilter/nf_queue.h | 2 +- include/net/netns/netfilter.h | 2 +- 5 files changed, 28 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614642eb7eb7..ca0a30127300 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1811,7 +1811,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct nf_hook_entry __rcu *nf_hooks_ingress; + struct nf_hook_entries __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 22f081065d49..f84bca1703cd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -72,25 +72,32 @@ struct nf_hook_ops { }; struct nf_hook_entry { - struct nf_hook_entry __rcu *next; nf_hookfn *hook; void *priv; - const struct nf_hook_ops *orig_ops; }; -static inline void -nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) -{ - entry->next = NULL; - entry->hook = ops->hook; - entry->priv = ops->priv; - entry->orig_ops = ops; -} +struct nf_hook_entries { + u16 num_hook_entries; + /* padding */ + struct nf_hook_entry hooks[]; + + /* trailer: pointers to original orig_ops of each hook. + * + * This is not part of struct nf_hook_entry since its only + * needed in slow path (hook register/unregister). + * + * const struct nf_hook_ops *orig_ops[] + */ +}; -static inline int -nf_hook_entry_priority(const struct nf_hook_entry *entry) +static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { - return entry->orig_ops->priority; + unsigned int n = e->num_hook_entries; + const void *hook_end; + + hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ + + return (struct nf_hook_ops **)hook_end; } static inline int @@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, return entry->hook(entry->priv, skb, state); } -static inline const struct nf_hook_ops * -nf_hook_entry_ops(const struct nf_hook_entry *entry) -{ - return entry->orig_ops; -} - static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, @@ -168,7 +169,7 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry *entry); + const struct nf_hook_entries *e, unsigned int i); /** * nf_hook - call a netfilter hook @@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_entry *hook_head; + struct nf_hook_entries *hook_head; int ret = 1; #ifdef HAVE_JUMP_LABEL @@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state, hook_head); + ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 59476061de86..8d5dae1e2ff8 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -17,7 +17,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { - struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; int ret; @@ -30,7 +30,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e, 0); if (ret == 0) return -1; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 4454719ff849..39468720fc19 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -10,9 +10,9 @@ struct nf_queue_entry { struct list_head list; struct sk_buff *skb; unsigned int id; + unsigned int hook_index; /* index in hook_entries->hook[] */ struct nf_hook_state state; - struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cea396b53a60..72d66c8763d0 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,7 +16,7 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif -- cgit From 5916a22b83041b07d63191fe06206ae0fff6ec7a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Jun 2017 11:32:45 -0700 Subject: dm: constify argument arrays The arrays of 'struct dm_arg' are never modified by the device-mapper core, so constify them so that they are placed in .rodata. (Exception: the args array in dm-raid cannot be constified because it is allocated on the stack and modified.) Signed-off-by: Eric Biggers Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4f2b3b2076c4..3b5fdf308148 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -387,7 +387,7 @@ struct dm_arg { * Validate the next argument, either returning it as *value or, if invalid, * returning -EINVAL and setting *error. */ -int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error); /* @@ -395,7 +395,7 @@ int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, * arg->min and arg->max further arguments. Either return the size as * *num_args or, if invalid, return -EINVAL and set *error. */ -int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set, +int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *num_args, char **error); /* -- cgit From 2cfa0bb25d25aa183ea29f1f9c2bc65f3f2c2264 Mon Sep 17 00:00:00 2001 From: Sinclair Yeh Date: Wed, 5 Jul 2017 01:37:55 -0700 Subject: drm/vmwgfx: Prepare to support fence fd Make the fields and flags available. Signed-off-by: Sinclair Yeh Reviewed-by: Deepak Singh Rawat Reviewed-by: Thomas Hellstrom --- include/uapi/drm/vmwgfx_drm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index d9dfde9aa757..0bc784f5e0db 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -297,13 +297,17 @@ union drm_vmw_surface_reference_arg { * @version: Allows expanding the execbuf ioctl parameters without breaking * backwards compatibility, since user-space will always tell the kernel * which version it uses. - * @flags: Execbuf flags. None currently. + * @flags: Execbuf flags. + * @imported_fence_fd: FD for a fence imported from another device * * Argument to the DRM_VMW_EXECBUF Ioctl. */ #define DRM_VMW_EXECBUF_VERSION 2 +#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0) +#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1) + struct drm_vmw_execbuf_arg { __u64 commands; __u32 command_size; @@ -312,7 +316,7 @@ struct drm_vmw_execbuf_arg { __u32 version; __u32 flags; __u32 context_handle; - __u32 pad64; + __s32 imported_fence_fd; }; /** @@ -328,6 +332,7 @@ struct drm_vmw_execbuf_arg { * @passed_seqno: The highest seqno number processed by the hardware * so far. This can be used to mark user-space fence objects as signaled, and * to determine whether a fence seqno might be stale. + * @fd: FD associated with the fence, -1 if not exported * @error: This member should've been set to -EFAULT on submission. * The following actions should be take on completion: * error == -EFAULT: Fence communication failed. The host is synchronized. @@ -345,7 +350,7 @@ struct drm_vmw_fence_rep { __u32 mask; __u32 seqno; __u32 passed_seqno; - __u32 pad64; + __s32 fd; __s32 error; }; -- cgit From 7521621e600aeefe5ffcc1f90ae26a42fc20c452 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 11 Aug 2017 15:44:43 +0200 Subject: Do not disable driver and bus shutdown hook when class shutdown hook is set. As seen from the implementation of the single class shutdown hook this is not very sound design. Rename the class shutdown hook to shutdown_pre to make it clear it runs before the driver shutdown hook. Signed-off-by: Michal Suchanek Reviewed-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index d5bcc5f109eb..c6f27207dbe8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -375,7 +375,7 @@ int subsys_virtual_register(struct bus_type *subsys, * @suspend: Used to put the device to sleep mode, usually to a low power * state. * @resume: Used to bring the device from the sleep mode. - * @shutdown: Called at shut-down time to quiesce the device. + * @shutdown_pre: Called at shut-down time before driver shutdown. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. * @pm: The default device power management operations of this class. @@ -404,7 +404,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - int (*shutdown)(struct device *dev); + int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; const void *(*namespace)(struct device *dev); -- cgit From 35f0b6a779b8b7a98faefd7c1c660b4dac9a5c26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Aug 2017 08:28:08 +0200 Subject: libata: quirk read log on no-name M.2 SSD Ido reported that reading the log page on his systems fails, so quirk it as it won't support ZBC or security protocols. Signed-off-by: Christoph Hellwig Reported-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 931c32f1f18d..9e927ae7fced 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -438,6 +438,7 @@ enum { ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ + ATA_HORKAGE_NO_LOG_PAGE = (1 << 26), /* Doesn't like Get Log Page */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit From 464bc0fd6273d518aee79fbd37211dd9bc35d863 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 28 Aug 2017 07:10:04 -0700 Subject: bpf: convert sockmap field attach_bpf_fd2 to type In the initial sockmap API we provided strparser and verdict programs using a single attach command by extending the attach API with a the attach_bpf_fd2 field. However, if we add other programs in the future we will be adding a field for every new possible type, attach_bpf_fd(3,4,..). This seems a bit clumsy for an API. So lets push the programs using two new type fields. BPF_SK_SKB_STREAM_PARSER BPF_SK_SKB_STREAM_VERDICT This has the advantage of having a readable name and can easily be extended in the future. Updates to samples and sockmap included here also generalize tests slightly to support upcoming patch for multiple map support. Signed-off-by: John Fastabend Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Suggested-by: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 ++++++++-- include/uapi/linux/bpf.h | 9 +++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 830f472d8df5..c2cb1b5c094e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,8 +39,6 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); - int (*map_attach)(struct bpf_map *map, - struct bpf_prog *p1, struct bpf_prog *p2); }; struct bpf_map { @@ -387,11 +385,19 @@ static inline void __dev_map_flush(struct bpf_map *map) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); +int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) { return NULL; } + +static inline int sock_map_attach_prog(struct bpf_map *map, + struct bpf_prog *prog, + u32 type) +{ + return -EOPNOTSUPP; +} #endif /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 843818dff96d..97227be3690c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -136,7 +136,8 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, - BPF_CGROUP_SMAP_INGRESS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, __MAX_BPF_ATTACH_TYPE }; @@ -224,7 +225,6 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; - __u32 attach_bpf_fd2; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -580,14 +580,11 @@ union bpf_attr { * @flags: reserved for future use * Return: SK_REDIRECT * - * int bpf_sock_map_update(skops, map, key, flags, map_flags) + * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem - * @map_flags: sock map specific flags - * bit 1: Enable strparser - * other bits: reserved */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit From 2f857d04601a1bb56958b95a9f180bce0e91e5e6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 28 Aug 2017 07:10:25 -0700 Subject: bpf: sockmap, remove STRPARSER map_flags and add multi-map support The addition of map_flags BPF_SOCKMAP_STRPARSER flags was to handle a specific use case where we want to have BPF parse program disabled on an entry in a sockmap. However, Alexei found the API a bit cumbersome and I agreed. Lets remove the STRPARSER flag and support the use case by allowing socks to be in multiple maps. This allows users to create two maps one with programs attached and one without. When socks are added to maps they now inherit any programs attached to the map. This is a nice generalization and IMO improves the API. The API rules are less ambiguous and do not need a flag: - When a sock is added to a sockmap we have two cases, i. The sock map does not have any attached programs so we can add sock to map without inheriting bpf programs. The sock may exist in 0 or more other maps. ii. The sock map has an attached BPF program. To avoid duplicate bpf programs we only add the sock entry if it does not have an existing strparser/verdict attached, returning -EBUSY if a program is already attached. Otherwise attach the program and inherit strparser/verdict programs from the sock map. This allows for socks to be in a multiple maps for redirects and inherit a BPF program from a single map. Also this patch simplifies the logic around BPF_{EXIST|NOEXIST|ANY} flags. In the original patch I tried to be extra clever and only update map entries when necessary. Now I've decided the complexity is not worth it. If users constantly update an entry with the same sock for no reason (i.e. update an entry without actually changing any parameters on map or sock) we still do an alloc/release. Using this and allowing multiple entries of a sock to exist in a map the logic becomes much simpler. Note: Now that multiple maps are supported the "maps" pointer called when a socket is closed becomes a list of maps to remove the sock from. To keep the map up to date when a sock is added to the sockmap we must add the map/elem in the list. Likewise when it is removed we must remove it from the list. This results in searching the per psock list on delete operation. On TCP_CLOSE events we walk the list and remove the psock from all map/entry locations. I don't see any perf implications in this because at most I have a psock in two maps. If a psock were to be in many maps its possibly this might be noticeable on delete but I can't think of a reason to dup a psock in many maps. The sk_callback_lock is used to protect read/writes to the list. This was convenient because in all locations we were taking the lock anyways just after working on the list. Also the lock is per sock so in normal cases we shouldn't see any contention. Suggested-by: Alexei Starovoitov Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 97227be3690c..08c206a863e1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,9 +143,6 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_SOCKMAP_STRPARSER is used sockmap will use strparser on receive */ -#define BPF_SOCKMAP_STRPARSER (1U << 0) - /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup -- cgit From aef3ad103a686f21b746977d4ed21cc1af36f589 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 13 Aug 2017 17:47:42 +0300 Subject: serial: core: remove unneeded irq_wake flag There is no need to duplicate a flag which IRQ core takes care of. Replace custom flag by IRQ core API that retrieves its state. Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8dc24c855a70..5553e04e59c9 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -247,7 +247,6 @@ struct uart_port { struct device *dev; /* parent device */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; - unsigned char irq_wake; unsigned char unused[2]; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ -- cgit From 63e8d4394a2d226803f47abd7287dbb6d21bf8e4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Aug 2017 16:58:20 +0300 Subject: serial: pch_uart: Make port type explicit It used to be a gap in port definitions after PORT_MAX_8250. Since the new drivers are coming the gap become shorter and shorter until the commit a2d6a987bfe4 ("serial: 8250: Add new port type for TI DA8xx/66AK2x") completely removed it. So, while type here is just a formality, make things a little bit more explicit for this driver and move port types to UAPI header. Note, it uses two types for now. Fixes: fddceb8b5399 ("tty: 8250: Add 64byte UART support for FSL platforms") Cc: Priyanka Jain Cc: Poonam Aggrwal Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 38bea3217ead..502aa23c7e15 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -57,7 +57,6 @@ #define PORT_RT2880 29 /* Ralink RT2880 internal UART */ #define PORT_16550A_FSL64 30 /* Freescale 16550 UART with 64 FIFOs */ #define PORT_DA830 31 /* TI DA8xx/66AK2x */ -#define PORT_MAX_8250 31 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed @@ -77,6 +76,10 @@ #define PORT_SUNZILOG 38 #define PORT_SUNSAB 39 +/* Intel EG20 */ +#define PORT_PCH_8LINE 44 +#define PORT_PCH_2LINE 45 + /* DEC */ #define PORT_DZ 46 #define PORT_ZS 47 -- cgit From 3f3dac7e4d815cb7f929c0ed98c3a45a86852e53 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Aug 2017 16:58:21 +0300 Subject: serial: Remove unused port type PORT_MFD is not in use since commit 1bd187de5364 ("x86, intel-mid: remove Intel MID specific serial support") Remove leftover. Fixes: 1bd187de5364 ("x86, intel-mid: remove Intel MID specific serial support") Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 502aa23c7e15..00d335634271 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -209,9 +209,6 @@ /* MAX310X */ #define PORT_MAX310X 94 -/* High Speed UART for Medfield */ -#define PORT_MFD 95 - /* TI OMAP-UART */ #define PORT_OMAP 96 -- cgit From ee1c90cc2cea80638f559c552371ee6893ca9d9e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Aug 2017 16:58:22 +0300 Subject: serial: Fix port type numbering for TI DA8xx The UAPI has a global list of unique numbers for different port types. The commit a2d6a987bfe4 ("serial: 8250: Add new port type for TI DA8xx/66AK2x") introduced a new port type and brought the collision with two other port types. Reuse 95 for it instead. Fixes: a2d6a987bfe4 ("serial: 8250: Add new port type for TI DA8xx/66AK2x") Cc: David Lechner Cc: Sekhar Nori Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 00d335634271..dc2d7cb766ab 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -56,7 +56,6 @@ #define PORT_ALTR_16550_F128 28 /* Altera 16550 UART with 128 FIFOs */ #define PORT_RT2880 29 /* Ralink RT2880 internal UART */ #define PORT_16550A_FSL64 30 /* Freescale 16550 UART with 64 FIFOs */ -#define PORT_DA830 31 /* TI DA8xx/66AK2x */ /* * ARM specific type numbers. These are not currently guaranteed @@ -209,6 +208,9 @@ /* MAX310X */ #define PORT_MAX310X 94 +/* TI DA8xx/66AK2x */ +#define PORT_DA830 95 + /* TI OMAP-UART */ #define PORT_OMAP 96 -- cgit From 6e0a5de2136b26bd0b3501f2f362abfffeb47d1e Mon Sep 17 00:00:00 2001 From: Rafael Gago Date: Mon, 31 Jul 2017 10:46:43 +0200 Subject: serial: 8250: Use hrtimers for rs485 delays Previously the timers where based on the classic timers, giving a too coarse resolution on systems with configs of less than 1000 HZ. This patch changes the rs485 timers to hrtimers. Signed-off-by: Rafael Gago Castano Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_8250.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 61fbb440449c..a27ef5f56431 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -80,9 +80,10 @@ struct uart_8250_ops { }; struct uart_8250_em485 { - struct timer_list start_tx_timer; /* "rs485 start tx" timer */ - struct timer_list stop_tx_timer; /* "rs485 stop tx" timer */ - struct timer_list *active_timer; /* pointer to active timer */ + struct hrtimer start_tx_timer; /* "rs485 start tx" timer */ + struct hrtimer stop_tx_timer; /* "rs485 stop tx" timer */ + struct hrtimer *active_timer; /* pointer to active timer */ + struct uart_8250_port *port; /* for hrtimer callbacks */ }; /* -- cgit From 1c16ae65e2502da05310b2ec56b3a1fd3efe6f4d Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 21 Aug 2017 01:17:56 +0800 Subject: serial: 8250: of: Add new port type for MediaTek BTIF controller on MT7622/23 SoC MediaTek BTIF controller is the serial interface similar to UART but it works only as the digital device which is mainly used to communicate with the connectivity module called CONNSYS inside the SoC which could be mostly found on those MediaTek SoCs with Bluetooth feature such as MT7622 and MT7623 SoCs. And the controller is made as being compatible with the 8250 register layout with extra registers such as DMA enablement so it tends to be integrated with reusing 8250 OF driver. However, DMA mode is not being supported yet in the current driver. Signed-off-by: Sean Wang Suggested-by: Andy Shevchenko Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index dc2d7cb766ab..50d71c436323 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -274,4 +274,7 @@ /* MPS2 UART */ #define PORT_MPS2UART 116 +/* MediaTek BTIF */ +#define PORT_MTK_BTIF 117 + #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit From 2ce8008711e4837c11e99a94df55406085d0d098 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 2 Aug 2017 09:58:52 +0200 Subject: mcb: introduce mcb_get_resource() Introduce mcb_get_resource() as a common accessor to a mcb device's memory or IRQ resources. Signed-off-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 4097ac9ea13a..b1a0ad9d23b3 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -136,5 +136,7 @@ extern struct resource *mcb_request_mem(struct mcb_device *dev, const char *name); extern void mcb_release_mem(struct resource *mem); extern int mcb_get_irq(struct mcb_device *dev); +extern struct resource *mcb_get_resource(struct mcb_device *dev, + unsigned int type); #endif /* _LINUX_MCB_H */ -- cgit From d01c3289e7d68162e32bc08c2b65dd1a216a7ef8 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Tue, 18 Jul 2017 06:27:59 +0900 Subject: pty: show associative slave of ptmx in fdinfo This patch adds "tty-index" field to /proc/PID/fdinfo/N if N specifies /dev/ptmx. The field shows the index of associative slave pts. Though a minor number is given for each pts instance, ptmx is not. It means there is no way in user-space to know the association between file descriptors for pts/n and ptmx. (n = 0, 1, ...) This is different from pipe. About pipe such association can be solved by inode of pipefs. Providing the way to know the association between pts/n and ptmx helps users understand the status of running system. lsof can utilize this field. Signed-off-by: Masatake YAMATO Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 00b2213f6a35..fcdc0f5d9098 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -243,6 +243,7 @@ #include #include #include +#include struct tty_struct; struct tty_driver; @@ -285,6 +286,7 @@ struct tty_operations { int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); int (*get_icount)(struct tty_struct *tty, struct serial_icounter_struct *icount); + void (*show_fdinfo)(struct tty_struct *tty, struct seq_file *m); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct tty_driver *driver, int line, char *options); int (*poll_get_char)(struct tty_driver *driver, int line); -- cgit From b6dccf7fae4331b0ea41cf087e3f02d5db9161dc Mon Sep 17 00:00:00 2001 From: Arnav Dawn Date: Wed, 12 Jul 2017 16:10:40 +0530 Subject: nvme: add support for FW activation without reset This patch adds support for handling Fw activation without reset On completion of FW-activation-starting AER, all queues are paused till CSTS.PP is cleared or timed out (exceeds max time for fw activtion MTFA). If device fails to clear CSTS.PP within MTFA, driver issues reset controller. Signed-off-by: Arnav Dawn Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- include/linux/nvme.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 25d8225dbd04..5d0f2f4fc3b8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -146,6 +146,7 @@ enum { NVME_CSTS_RDY = 1 << 0, NVME_CSTS_CFS = 1 << 1, NVME_CSTS_NSSRO = 1 << 4, + NVME_CSTS_PP = 1 << 5, NVME_CSTS_SHST_NORMAL = 0 << 2, NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2, @@ -376,6 +377,13 @@ struct nvme_smart_log { __u8 rsvd216[296]; }; +struct nvme_fw_slot_info_log { + __u8 afi; + __u8 rsvd1[7]; + __le64 frs[7]; + __u8 rsvd64[448]; +}; + enum { NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_TEMPERATURE = 1 << 1, @@ -386,6 +394,7 @@ enum { enum { NVME_AER_NOTICE_NS_CHANGED = 0x0002, + NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, }; struct nvme_lba_range_type { -- cgit From 62346eaeb2f1a0524b35eaa2f479596f40491165 Mon Sep 17 00:00:00 2001 From: Arnav Dawn Date: Wed, 12 Jul 2017 16:11:53 +0530 Subject: nvme: define NVME_NSID_ALL Define the constant "0xffffffff" (used as nsid for all namespaces) as NVME_NSID_ALL. Signed-off-by: Arnav Dawn Signed-off-by: Sagi Grimberg --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5d0f2f4fc3b8..1d79046bf9d4 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -32,6 +32,8 @@ #define NVME_RDMA_IP_PORT 4420 +#define NVME_NSID_ALL 0xffffffff + enum nvme_subsys_type { NVME_NQN_DISC = 1, /* Discovery type target subsystem */ NVME_NQN_NVME = 2, /* NVME type target subsystem */ -- cgit From dbf86b39005d26b21c52a23720e15fb850d71cdc Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 16 Aug 2017 09:51:29 +0200 Subject: nvme: add support for NVMe 1.3 Timestamp Feature NVME's Timestamp feature allows controllers to be aware of the epoch time in milliseconds. This patch adds the set features hook for various transports through the identify path, so that resets and resumes can update the controller as necessary. Signed-off-by: Jon Derrick [hch: rebased on top of nvme-4.13 error handling changes, changed nvme_configure_timestamp to return the status] Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1d79046bf9d4..a12b47073273 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -254,6 +254,7 @@ enum { NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, + NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, @@ -688,6 +689,7 @@ enum { NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_AUTO_PST = 0x0c, NVME_FEAT_HOST_MEM_BUF = 0x0d, + NVME_FEAT_TIMESTAMP = 0x0e, NVME_FEAT_KATO = 0x0f, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, -- cgit From ad4e05b24c428d6125f6f10bd300600cae5079d4 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Aug 2017 19:21:06 +0300 Subject: nvme: add symbolic constants for CC identifiers Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a12b47073273..7b4322dc2975 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -135,16 +135,22 @@ enum { enum { NVME_CC_ENABLE = 1 << 0, NVME_CC_CSS_NVM = 0 << 4, + NVME_CC_EN_SHIFT = 0, + NVME_CC_CSS_SHIFT = 4, NVME_CC_MPS_SHIFT = 7, - NVME_CC_ARB_RR = 0 << 11, - NVME_CC_ARB_WRRU = 1 << 11, - NVME_CC_ARB_VS = 7 << 11, - NVME_CC_SHN_NONE = 0 << 14, - NVME_CC_SHN_NORMAL = 1 << 14, - NVME_CC_SHN_ABRUPT = 2 << 14, - NVME_CC_SHN_MASK = 3 << 14, - NVME_CC_IOSQES = NVME_NVM_IOSQES << 16, - NVME_CC_IOCQES = NVME_NVM_IOCQES << 20, + NVME_CC_AMS_SHIFT = 11, + NVME_CC_SHN_SHIFT = 14, + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_ARB_RR = 0 << NVME_CC_AMS_SHIFT, + NVME_CC_ARB_WRRU = 1 << NVME_CC_AMS_SHIFT, + NVME_CC_ARB_VS = 7 << NVME_CC_AMS_SHIFT, + NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, NVME_CSTS_RDY = 1 << 0, NVME_CSTS_CFS = 1 << 1, NVME_CSTS_NSSRO = 1 << 4, -- cgit From 60b43f627a71aaf233ef5af90f72e207c29781b4 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Aug 2017 19:21:07 +0300 Subject: nvme: rename AMS symbolic constants to fit specification Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7b4322dc2975..05560c2ecc83 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -142,9 +142,9 @@ enum { NVME_CC_SHN_SHIFT = 14, NVME_CC_IOSQES_SHIFT = 16, NVME_CC_IOCQES_SHIFT = 20, - NVME_CC_ARB_RR = 0 << NVME_CC_AMS_SHIFT, - NVME_CC_ARB_WRRU = 1 << NVME_CC_AMS_SHIFT, - NVME_CC_ARB_VS = 7 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, -- cgit From 48fa362b6c3f4d69bdb6310b46626049092475e0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 31 Jul 2017 13:21:14 -0700 Subject: nvmet-fc: simplify sg list handling The existing nvmet_fc sg list handling has 2 faults: a) the request between LLDD and transport has too large of an sg list (256 elements), which is normally 256k (64 elements). b) sglist handling doesn't optimize on the fact that each element is a page. This patch removes the static sg list in the request and uses the dynamic list already present in the nvmet_fc transport. It also simplies the handling of the sg list on multiple sequences to take advantage of the per-page divisions. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 2591878c1d48..9c5cb4480806 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -624,7 +624,7 @@ struct nvmefc_tgt_fcp_req { u32 timeout; u32 transfer_length; struct fc_ba_rjt ba_rjt; - struct scatterlist sg[NVME_FC_MAX_SEGMENTS]; + struct scatterlist *sg; int sg_cnt; void *rspaddr; dma_addr_t rspdma; -- cgit From 01f33c336e2d298ea5d4ce5d6e5bcd12865cc30f Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 14 Aug 2017 22:12:38 +0200 Subject: string.h: add memcpy_and_pad() This helper function is useful for the nvme subsystem, and maybe others. Note: the warnings reported by the kbuild test robot for this patch are actually generated by the use of CONFIG_PROFILE_ALL_BRANCHES together with __FORTIFY_INLINE. Signed-off-by: Martin Wilck Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/string.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index a467e617eeb0..0bec4151b0eb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -200,6 +200,7 @@ static inline const char *kbasename(const char *path) void fortify_panic(const char *name) __noreturn __cold; void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); +void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) @@ -395,4 +396,33 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) #endif +/** + * memcpy_and_pad - Copy one buffer to another with padding + * @dest: Where to copy to + * @dest_len: The destination buffer size + * @src: Where to copy from + * @count: The number of bytes to copy + * @pad: Character to use for padding if space is left in destination. + */ +__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len, + const void *src, size_t count, int pad) +{ + size_t dest_size = __builtin_object_size(dest, 0); + size_t src_size = __builtin_object_size(src, 0); + + if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) { + if (dest_size < dest_len && dest_size < count) + __write_overflow(); + else if (src_size < dest_len && src_size < count) + __read_overflow3(); + } + if (dest_size < dest_len) + fortify_panic(__func__); + if (dest_len > count) { + memcpy(dest, src, count); + memset(dest + count, pad, dest_len - count); + } else + memcpy(dest, src, dest_len); +} + #endif /* _LINUX_STRING_H_ */ -- cgit From afaf59237843bf89823c33143beca6b262dff0ca Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:19 -0700 Subject: drm/syncobj: Rename fence_get to find_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function has far more in common with drm_syncobj_find than with any in the get/put functions. Signed-off-by: Jason Ekstrand Acked-by: Christian König (v1) Signed-off-by: Dave Airlie --- include/drm/drm_syncobj.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 89976da542b1..7d4ad777132e 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -81,9 +81,9 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence); -int drm_syncobj_fence_get(struct drm_file *file_private, - u32 handle, - struct dma_fence **fence); +int drm_syncobj_find_fence(struct drm_file *file_private, + u32 handle, + struct dma_fence **fence); void drm_syncobj_free(struct kref *kref); #endif -- cgit From 309a5482fa9eb7bc754bf95a2cd89091b01c33d2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:20 -0700 Subject: drm/syncobj: Add a race-free drm_syncobj_fence_get helper (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The atomic exchange operation in drm_syncobj_replace_fence is sufficient for the case where it races with itself. However, if you have a race between a replace_fence and dma_fence_get(syncobj->fence), you may end up with the entire replace_fence happening between the point in time where the one thread gets the syncobj->fence pointer and when it calls dma_fence_get() on it. If this happens, then the reference may be dropped before we get a chance to get a new one. The new helper uses dma_fence_get_rcu_safe to get rid of the race. This is also needed because it allows us to do a bit more than just get a reference in drm_syncobj_fence_get should we wish to do so. v2: - RCU isn't that scary - Call rcu_read_lock/unlock - Don't rename fence to _fence - Make the helper static inline Signed-off-by: Jason Ekstrand Acked-by: Christian König (v1) Signed-off-by: Dave Airlie --- include/drm/drm_syncobj.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 7d4ad777132e..ce94d14c5087 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -77,6 +77,18 @@ drm_syncobj_put(struct drm_syncobj *obj) kref_put(&obj->refcount, drm_syncobj_free); } +static inline struct dma_fence * +drm_syncobj_fence_get(struct drm_syncobj *syncobj) +{ + struct dma_fence *fence; + + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&syncobj->fence); + rcu_read_unlock(); + + return fence; +} + struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, -- cgit From 5e60a10eaebab93f823295cd7ec3848ba3b6e553 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 25 Aug 2017 10:52:22 -0700 Subject: drm/syncobj: add sync obj wait interface. (v8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This interface will allow sync object to be used to back Vulkan fences. This API is pretty much the vulkan fence waiting API, and I've ported the code from amdgpu. v2: accept relative timeout, pass remaining time back to userspace. v3: return to absolute timeouts. v4: absolute zero = poll, rewrite any/all code to have same operation for arrays return -EINVAL for 0 fences. v4.1: fixup fences allocation check, use u64_to_user_ptr v5: move to sec/nsec, and use timespec64 for calcs. v6: use -ETIME and drop the out status flag. (-ETIME is suggested by ickle, I can feel a shed painting) v7: talked to Daniel/Arnd, use ktime and ns everywhere. v8: be more careful in the timeout calculations use uint32_t for counter variables so we don't overflow graciously handle -ENOINT being returned from dma_fence_wait_timeout Signed-off-by: Dave Airlie Reviewed-by: Jason Ekstrand Acked-by: Christian König Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 101593ab10ac..0757c1a41821 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -718,6 +718,17 @@ struct drm_syncobj_handle { __u32 pad; }; +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -840,6 +851,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) /** * Device specific ioctls should only be in their respective headers -- cgit From 9c19fb10a5893d6501df4d0fb93d954d5fc1d91b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 07:39:25 -0700 Subject: drm/syncobj: Add a callback mechanism for replace_fence (v3) It is useful in certain circumstances to know when the fence is replaced in a syncobj. Specifically, it may be useful to know when the fence goes from NULL to something valid. This does make syncobj_replace_fence a little more expensive because it has to take a lock but, in the common case where there is no callback list, it spends a very short amount of time inside the lock. v2: - Don't lock in drm_syncobj_fence_get. We only really need to lock around fence_replace to make the callback work. v3: - Fix the cb_list comment to make kbuild happy Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- include/drm/drm_syncobj.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index ce94d14c5087..c00fee539822 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -28,6 +28,8 @@ #include "linux/dma-fence.h" +struct drm_syncobj_cb; + /** * struct drm_syncobj - sync object. * @@ -43,8 +45,21 @@ struct drm_syncobj { /** * @fence: * NULL or a pointer to the fence bound to this object. + * + * This field should not be used directly. Use drm_syncobj_fence_get + * and drm_syncobj_replace_fence instead. */ struct dma_fence *fence; + /** + * @cb_list: + * List of callbacks to call when the fence gets replaced + */ + struct list_head cb_list; + /** + * @lock: + * locks cb_list and write-locks fence. + */ + spinlock_t lock; /** * @file: * a file backing for this syncobj. @@ -52,6 +67,25 @@ struct drm_syncobj { struct file *file; }; +typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb); + +/** + * struct drm_syncobj_cb - callback for drm_syncobj_add_callback + * @node: used by drm_syncob_add_callback to append this struct to + * syncobj::cb_list + * @func: drm_syncobj_func_t to call + * + * This struct will be initialized by drm_syncobj_add_callback, additional + * data can be passed along by embedding drm_syncobj_cb in another struct. + * The callback will get called the next time drm_syncobj_replace_fence is + * called. + */ +struct drm_syncobj_cb { + struct list_head node; + drm_syncobj_func_t func; +}; + void drm_syncobj_free(struct kref *kref); /** @@ -91,6 +125,11 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj) struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle); +void drm_syncobj_add_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func); +void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, + struct drm_syncobj_cb *cb); void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *fence); int drm_syncobj_find_fence(struct drm_file *file_private, -- cgit From 1fc08218ed2a42c86af5c905fe4c00885376a07e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:25 -0700 Subject: drm/syncobj: Add a CREATE_SIGNALED flag This requests that the driver create the sync object such that it already has a signaled dma_fence attached. Because we don't need anything in particular (just something signaled), we use a dummy null fence. This is useful for Vulkan which has a similar flag that can be passed to vkCreateFence. Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 0757c1a41821..ade7f68d32b5 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -700,6 +700,7 @@ struct drm_prime_handle { struct drm_syncobj_create { __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) __u32 flags; }; -- cgit From e7aca5031a2fb51b6120864d0eff5478c95e6651 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Aug 2017 10:52:24 -0700 Subject: drm/syncobj: Allow wait for submit and signal behavior (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vulkan VkFence semantics require that the application be able to perform a CPU wait on work which may not yet have been submitted. This is perfectly safe because the CPU wait has a timeout which will get triggered eventually if no work is ever submitted. This behavior is advantageous for multi-threaded workloads because, so long as all of the threads agree on what fences to use up-front, you don't have the extra cross-thread synchronization cost of thread A telling thread B that it has submitted its dependent work and thread B is now free to wait. Within a single process, this can be implemented in the userspace driver by doing exactly the same kind of tracking the app would have to do using posix condition variables or similar. However, in order for this to work cross-process (as is required by VK_KHR_external_fence), we need to handle this in the kernel. This commit adds a WAIT_FOR_SUBMIT flag to DRM_IOCTL_SYNCOBJ_WAIT which instructs the IOCTL to wait for the syncobj to have a non-null fence and then wait on the fence. Combined with DRM_IOCTL_SYNCOBJ_RESET, you can easily get the Vulkan behavior. v2: - Fix a bug in the invalid syncobj error path - Unify the wait-all and wait-any cases v3: - Unify the timeout == 0 case a bit with the timeout > 0 case - Use wait_event_interruptible_timeout v4: - Use proxy fence v5: - Revert to a combination of v2 and v3 - Don't use proxy fences - Don't use wait_event_interruptible_timeout because it just adds an extra layer of callbacks Signed-off-by: Jason Ekstrand Cc: Dave Airlie Cc: Chris Wilson Cc: Christian König Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index ade7f68d32b5..4c746597225e 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -720,6 +720,7 @@ struct drm_syncobj_handle { }; #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ -- cgit From 1a66a836da630cd70f3639208da549b549ce576b Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 25 Aug 2017 09:21:28 -0700 Subject: gre: add collect_md mode to ERSPAN tunnel Similar to gre, vxlan, geneve, ipip tunnels, allow ERSPAN tunnels to operate in 'collect metadata' mode. bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away. OVS can use it as well in the future. Signed-off-by: William Tu Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 625c29329372..992652856fe8 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -154,8 +154,10 @@ struct ip_tunnel { #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) +#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) -#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) +#define TUNNEL_OPTIONS_PRESENT \ + (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) struct tnl_ptk_info { __be16 flags; -- cgit From 4e587ea71bf924f7dac621f1351653bd41e446cb Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Aug 2017 15:03:10 -0700 Subject: ipv6: fix sparse warning on rt6i_node Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This generates a new sparse warning on rt->rt6i_node related code: net/ipv6/route.c:1394:30: error: incompatible types in comparison expression (different address spaces) ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison expression (different address spaces) This commit adds "__rcu" tag for rt6i_node and makes sure corresponding rcu API is used for it. After this fix, sparse no longer generates the above warning. Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e9c59db92942..af509f801084 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -105,7 +105,7 @@ struct rt6_info { * the same cache line. */ struct fib6_table *rt6i_table; - struct fib6_node *rt6i_node; + struct fib6_node __rcu *rt6i_node; struct in6_addr rt6i_gateway; -- cgit From 4734b4f417126e8773b3983122ca935d02af80de Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:23:45 -0700 Subject: IB/rdmavt: Add QP iterator API for QPs There are currently 3 spots in the qib and hfi1 driver that have knowledge of the internal QP hash list that should only be in scope to rdmavt QP code. Add an iterator API for processing all QPs to hide the nature of the RCU hashlist. The API consists of: - rvt_qp_iter_init() * For iterating QPs one at a time for seq_file semantics - rvt_qp_iter_next() * For iterating QPs one at a time for seq_file semantics - rvt_qp_iter() * For iterating all QPs The first two are used for things like seq_file prints. The last is for code that just needs to iterate all QPs in the system. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_qp.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 8fbafb0ce674..dfeb311c30a1 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -673,4 +673,33 @@ void rvt_del_timers_sync(struct rvt_qp *qp); void rvt_stop_rc_timers(struct rvt_qp *qp); void rvt_add_retry_timer(struct rvt_qp *qp); +/** + * struct rvt_qp_iter - the iterator for QPs + * @qp - the current QP + * + * This structure defines the current iterator + * state for sequenced access to all QPs relative + * to an rvt_dev_info. + */ +struct rvt_qp_iter { + struct rvt_qp *qp; + /* private: backpointer */ + struct rvt_dev_info *rdi; + /* private: callback routine */ + void (*cb)(struct rvt_qp *qp, u64 v); + /* private: for arg to callback routine */ + u64 v; + /* private: number of SMI,GSI QPs for device */ + int specials; + /* private: current iterator index */ + int n; +}; + +struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, + u64 v, + void (*cb)(struct rvt_qp *qp, u64 v)); +int rvt_qp_iter_next(struct rvt_qp_iter *iter); +void rvt_qp_iter(struct rvt_dev_info *rdi, + u64 v, + void (*cb)(struct rvt_qp *qp, u64 v)); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit From 0208da90def5776cef940f9de4ffe6ecef346207 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:24:10 -0700 Subject: IB/rdmavt: Handle dereg of inuse MRs properly A destroy of an MR prior to destroying the QP can cause the following diagnostic if the QP is referencing the MR being de-registered: hfi1 0000:05:00.0: hfi1_0: rvt_dereg_mr timeout mr ffff8808562108 00 pd ffff880859b20b00 The solution is to when the a non-zero refcount is encountered when the MR is destroyed the QPs needs to be iterated looking for QPs in the same PD as the MR. If rvt_qp_mr_clean() detects any such QP references the rkey/lkey, the QP needs to be put into an error state via a call to rvt_qp_error() which will trigger the clean up of any stuck references. This solution is as specified in IBTA 1.3 Volume 1 11.2.10.5. [This is reproduced with the 0.4.9 version of qperf and the rc_bw test] Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- include/rdma/rdmavt_mr.h | 3 +++ include/rdma/rdmavt_qp.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index f418bd5571a5..72a3856d4057 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -191,4 +191,7 @@ static inline void rvt_skip_sge(struct rvt_sge_state *ss, u32 length, } } +bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey); +bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey); + #endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index dfeb311c30a1..0eed3d8752fa 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -702,4 +702,5 @@ int rvt_qp_iter_next(struct rvt_qp_iter *iter); void rvt_qp_iter(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); +void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit From b339752d054fb32863418452dff350a1086885b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 28 Aug 2017 14:51:27 -0700 Subject: cpumask: fix spurious cpumask_of_node() on non-NUMA multi-node configs When !NUMA, cpumask_of_node(@node) equals cpu_online_mask regardless of @node. The assumption seems that if !NUMA, there shouldn't be more than one node and thus reporting cpu_online_mask regardless of @node is correct. However, that assumption was broken years ago to support DISCONTIGMEM and whether a system has multiple nodes or not is separately controlled by NEED_MULTIPLE_NODES. This means that, on a system with !NUMA && NEED_MULTIPLE_NODES, cpumask_of_node() will report cpu_online_mask for all possible nodes, indicating that the CPUs are associated with multiple nodes which is an impossible configuration. This bug has been around forever but doesn't look like it has caused any noticeable symptoms. However, it triggers a WARN recently added to workqueue to verify NUMA affinity configuration. Fix it by reporting empty cpumask on non-zero nodes if !NUMA. Signed-off-by: Tejun Heo Reported-and-tested-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/asm-generic/topology.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index fc824e2828f3..5d2add1a6c96 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -48,7 +48,11 @@ #define parent_node(node) ((void)(node),0) #endif #ifndef cpumask_of_node -#define cpumask_of_node(node) ((void)node, cpu_online_mask) + #ifdef CONFIG_NEED_MULTIPLE_NODES + #define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask) + #else + #define cpumask_of_node(node) ((void)node, cpu_online_mask) + #endif #endif #ifndef pcibus_to_node #define pcibus_to_node(bus) ((void)(bus), -1) -- cgit From 5aa5911a0ed9355ebb02f83de4be9ba435a45a2c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 23 Aug 2017 16:54:43 -0600 Subject: ACPI / blacklist: add acpi_match_platform_list() ACPI OEM ID / OEM Table ID / Revision can be used to identify a platform based on ACPI firmware info. acpi_blacklisted(), intel_pstate_platform_pwr_mgmt_exists(), and some other funcs, have been using similar check to detect a list of platforms that require special handlings. Move the platform check in acpi_blacklisted() to a new common utility function, acpi_match_platform_list(), so that other drivers do not have to implement their own version. There is no change in functionality. Signed-off-by: Toshi Kani Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 27b4b6615263..1c8a97d8b09b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -556,6 +556,25 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, #define ACPI_OST_SC_DRIVER_LOAD_FAILURE 0x81 #define ACPI_OST_SC_INSERT_NOT_SUPPORTED 0x82 +enum acpi_predicate { + all_versions, + less_than_or_equal, + equal, + greater_than_or_equal, +}; + +/* Table must be terminted by a NULL entry */ +struct acpi_platform_list { + char oem_id[ACPI_OEM_ID_SIZE+1]; + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE+1]; + u32 oem_revision; + char *table; + enum acpi_predicate pred; + char *reason; + u32 data; +}; +int acpi_match_platform_list(const struct acpi_platform_list *plat); + extern void acpi_early_init(void); extern void acpi_subsystem_init(void); -- cgit From 5bf916ee0ab638c86edeaf4caeeade9ddf44d95d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 27 Aug 2017 17:03:33 +0200 Subject: irda: move include/net/irda into staging subdirectory And finally, move the irda include files into drivers/staging/irda/include/net/irda. Yes, it's a long path, but it makes it easy for us to just add a Makefile directory path addition and all of the net and drivers code "just works". Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/net/irda/af_irda.h | 87 ---------- include/net/irda/crc.h | 29 ---- include/net/irda/discovery.h | 95 ----------- include/net/irda/ircomm_core.h | 106 ------------ include/net/irda/ircomm_event.h | 83 ---------- include/net/irda/ircomm_lmp.h | 36 ---- include/net/irda/ircomm_param.h | 147 ----------------- include/net/irda/ircomm_ttp.h | 37 ----- include/net/irda/ircomm_tty.h | 121 -------------- include/net/irda/ircomm_tty_attach.h | 92 ----------- include/net/irda/irda.h | 115 ------------- include/net/irda/irda_device.h | 285 -------------------------------- include/net/irda/iriap.h | 108 ------------ include/net/irda/iriap_event.h | 85 ---------- include/net/irda/irias_object.h | 108 ------------ include/net/irda/irlan_client.h | 42 ----- include/net/irda/irlan_common.h | 230 -------------------------- include/net/irda/irlan_eth.h | 32 ---- include/net/irda/irlan_event.h | 81 --------- include/net/irda/irlan_filter.h | 35 ---- include/net/irda/irlan_provider.h | 52 ------ include/net/irda/irlap.h | 311 ----------------------------------- include/net/irda/irlap_event.h | 129 --------------- include/net/irda/irlap_frame.h | 167 ------------------- include/net/irda/irlmp.h | 295 --------------------------------- include/net/irda/irlmp_event.h | 98 ----------- include/net/irda/irlmp_frame.h | 62 ------- include/net/irda/irmod.h | 109 ------------ include/net/irda/irqueue.h | 96 ----------- include/net/irda/irttp.h | 210 ----------------------- include/net/irda/parameters.h | 100 ----------- include/net/irda/qos.h | 101 ------------ include/net/irda/timer.h | 105 ------------ include/net/irda/wrapper.h | 58 ------- 34 files changed, 3847 deletions(-) delete mode 100644 include/net/irda/af_irda.h delete mode 100644 include/net/irda/crc.h delete mode 100644 include/net/irda/discovery.h delete mode 100644 include/net/irda/ircomm_core.h delete mode 100644 include/net/irda/ircomm_event.h delete mode 100644 include/net/irda/ircomm_lmp.h delete mode 100644 include/net/irda/ircomm_param.h delete mode 100644 include/net/irda/ircomm_ttp.h delete mode 100644 include/net/irda/ircomm_tty.h delete mode 100644 include/net/irda/ircomm_tty_attach.h delete mode 100644 include/net/irda/irda.h delete mode 100644 include/net/irda/irda_device.h delete mode 100644 include/net/irda/iriap.h delete mode 100644 include/net/irda/iriap_event.h delete mode 100644 include/net/irda/irias_object.h delete mode 100644 include/net/irda/irlan_client.h delete mode 100644 include/net/irda/irlan_common.h delete mode 100644 include/net/irda/irlan_eth.h delete mode 100644 include/net/irda/irlan_event.h delete mode 100644 include/net/irda/irlan_filter.h delete mode 100644 include/net/irda/irlan_provider.h delete mode 100644 include/net/irda/irlap.h delete mode 100644 include/net/irda/irlap_event.h delete mode 100644 include/net/irda/irlap_frame.h delete mode 100644 include/net/irda/irlmp.h delete mode 100644 include/net/irda/irlmp_event.h delete mode 100644 include/net/irda/irlmp_frame.h delete mode 100644 include/net/irda/irmod.h delete mode 100644 include/net/irda/irqueue.h delete mode 100644 include/net/irda/irttp.h delete mode 100644 include/net/irda/parameters.h delete mode 100644 include/net/irda/qos.h delete mode 100644 include/net/irda/timer.h delete mode 100644 include/net/irda/wrapper.h (limited to 'include') diff --git a/include/net/irda/af_irda.h b/include/net/irda/af_irda.h deleted file mode 100644 index 0df574931522..000000000000 --- a/include/net/irda/af_irda.h +++ /dev/null @@ -1,87 +0,0 @@ -/********************************************************************* - * - * Filename: af_irda.h - * Version: 1.0 - * Description: IrDA sockets declarations - * Status: Stable - * Author: Dag Brattli - * Created at: Tue Dec 9 21:13:12 1997 - * Modified at: Fri Jan 28 13:16:32 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef AF_IRDA_H -#define AF_IRDA_H - -#include -#include -#include /* struct iriap_cb */ -#include /* struct ias_value */ -#include /* struct lsap_cb */ -#include /* struct tsap_cb */ -#include /* struct discovery_t */ -#include - -/* IrDA Socket */ -struct irda_sock { - /* struct sock has to be the first member of irda_sock */ - struct sock sk; - __u32 saddr; /* my local address */ - __u32 daddr; /* peer address */ - - struct lsap_cb *lsap; /* LSAP used by Ultra */ - __u8 pid; /* Protocol IP (PID) used by Ultra */ - - struct tsap_cb *tsap; /* TSAP used by this connection */ - __u8 dtsap_sel; /* remote TSAP address */ - __u8 stsap_sel; /* local TSAP address */ - - __u32 max_sdu_size_rx; - __u32 max_sdu_size_tx; - __u32 max_data_size; - __u8 max_header_size; - struct qos_info qos_tx; - - __u16_host_order mask; /* Hint bits mask */ - __u16_host_order hints; /* Hint bits */ - - void *ckey; /* IrLMP client handle */ - void *skey; /* IrLMP service handle */ - - struct ias_object *ias_obj; /* Our service name + lsap in IAS */ - struct iriap_cb *iriap; /* Used to query remote IAS */ - struct ias_value *ias_result; /* Result of remote IAS query */ - - hashbin_t *cachelog; /* Result of discovery query */ - __u32 cachedaddr; /* Result of selective discovery query */ - - int nslots; /* Number of slots to use for discovery */ - - int errno; /* status of the IAS query */ - - wait_queue_head_t query_wait; /* Wait for the answer to a query */ - struct timer_list watchdog; /* Timeout for discovery */ - - LOCAL_FLOW tx_flow; - LOCAL_FLOW rx_flow; -}; - -static inline struct irda_sock *irda_sk(struct sock *sk) -{ - return (struct irda_sock *)sk; -} - -#endif /* AF_IRDA_H */ diff --git a/include/net/irda/crc.h b/include/net/irda/crc.h deleted file mode 100644 index f202296df9bb..000000000000 --- a/include/net/irda/crc.h +++ /dev/null @@ -1,29 +0,0 @@ -/********************************************************************* - * - * Filename: crc.h - * Version: - * Description: CRC routines - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Sun May 2 20:25:23 1999 - * Modified by: Dag Brattli - * - ********************************************************************/ - -#ifndef IRDA_CRC_H -#define IRDA_CRC_H - -#include -#include - -#define INIT_FCS 0xffff /* Initial FCS value */ -#define GOOD_FCS 0xf0b8 /* Good final FCS value */ - -/* Recompute the FCS with one more character appended. */ -#define irda_fcs(fcs, c) crc_ccitt_byte(fcs, c) - -/* Recompute the FCS with len bytes appended. */ -#define irda_calc_crc16(fcs, buf, len) crc_ccitt(fcs, buf, len) - -#endif diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h deleted file mode 100644 index 63ae32530567..000000000000 --- a/include/net/irda/discovery.h +++ /dev/null @@ -1,95 +0,0 @@ -/********************************************************************* - * - * Filename: discovery.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Apr 6 16:53:53 1999 - * Modified at: Tue Oct 5 10:05:10 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef DISCOVERY_H -#define DISCOVERY_H - -#include - -#include -#include /* irda_queue_t */ -#include /* LAP_REASON */ - -#define DISCOVERY_EXPIRE_TIMEOUT (2*sysctl_discovery_timeout*HZ) -#define DISCOVERY_DEFAULT_SLOTS 0 - -/* - * This type is used by the protocols that transmit 16 bits words in - * little endian format. A little endian machine stores MSB of word in - * byte[1] and LSB in byte[0]. A big endian machine stores MSB in byte[0] - * and LSB in byte[1]. - * - * This structure is used in the code for things that are endian neutral - * but that fit in a word so that we can manipulate them efficiently. - * By endian neutral, I mean things that are really an array of bytes, - * and always used as such, for example the hint bits. Jean II - */ -typedef union { - __u16 word; - __u8 byte[2]; -} __u16_host_order; - -/* Types of discovery */ -typedef enum { - DISCOVERY_LOG, /* What's in our discovery log */ - DISCOVERY_ACTIVE, /* Doing our own discovery on the medium */ - DISCOVERY_PASSIVE, /* Peer doing discovery on the medium */ - EXPIRY_TIMEOUT, /* Entry expired due to timeout */ -} DISCOVERY_MODE; - -#define NICKNAME_MAX_LEN 21 - -/* Basic discovery information about a peer */ -typedef struct irda_device_info discinfo_t; /* linux/irda.h */ - -/* - * The DISCOVERY structure is used for both discovery requests and responses - */ -typedef struct discovery_t { - irda_queue_t q; /* Must be first! */ - - discinfo_t data; /* Basic discovery information */ - int name_len; /* Length of nickname */ - - LAP_REASON condition; /* More info about the discovery */ - int gen_addr_bit; /* Need to generate a new device - * address? */ - int nslots; /* Number of slots to use when - * discovering */ - unsigned long timestamp; /* Last time discovered */ - unsigned long firststamp; /* First time discovered */ -} discovery_t; - -void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *discovery); -void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log); -void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force); -struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, - __u16 mask, int old_entries); - -#endif diff --git a/include/net/irda/ircomm_core.h b/include/net/irda/ircomm_core.h deleted file mode 100644 index 2a580ce9edad..000000000000 --- a/include/net/irda/ircomm_core.h +++ /dev/null @@ -1,106 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_core.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 08:58:43 1999 - * Modified at: Mon Dec 13 11:52:29 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_CORE_H -#define IRCOMM_CORE_H - -#include -#include -#include - -#define IRCOMM_MAGIC 0x98347298 -#define IRCOMM_HEADER_SIZE 1 - -struct ircomm_cb; /* Forward decl. */ - -/* - * A small call-table, so we don't have to check the service-type whenever - * we want to do something - */ -typedef struct { - int (*data_request)(struct ircomm_cb *, struct sk_buff *, int clen); - int (*connect_request)(struct ircomm_cb *, struct sk_buff *, - struct ircomm_info *); - int (*connect_response)(struct ircomm_cb *, struct sk_buff *); - int (*disconnect_request)(struct ircomm_cb *, struct sk_buff *, - struct ircomm_info *); -} call_t; - -struct ircomm_cb { - irda_queue_t queue; - magic_t magic; - - notify_t notify; - call_t issue; - - int state; - int line; /* Which TTY line we are using */ - - struct tsap_cb *tsap; - struct lsap_cb *lsap; - - __u8 dlsap_sel; /* Destination LSAP/TSAP selector */ - __u8 slsap_sel; /* Source LSAP/TSAP selector */ - - __u32 saddr; /* Source device address (link we are using) */ - __u32 daddr; /* Destination device address */ - - int max_header_size; /* Header space we must reserve for each frame */ - int max_data_size; /* The amount of data we can fill in each frame */ - - LOCAL_FLOW flow_status; /* Used by ircomm_lmp */ - int pkt_count; /* Number of frames we have sent to IrLAP */ - - __u8 service_type; -}; - -extern hashbin_t *ircomm; - -struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line); -int ircomm_close(struct ircomm_cb *self); - -int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb); -void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb); -void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb); -int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, struct sk_buff *skb, - __u8 service_type); -void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata); -int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata); -void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info); -void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow); - -#define ircomm_is_connected(self) (self->state == IRCOMM_CONN) - -#endif diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h deleted file mode 100644 index 5bbc32998d57..000000000000 --- a/include/net/irda/ircomm_event.h +++ /dev/null @@ -1,83 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_event.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Jun 6 23:51:13 1999 - * Modified at: Thu Jun 10 08:36:25 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_EVENT_H -#define IRCOMM_EVENT_H - -#include - -typedef enum { - IRCOMM_IDLE, - IRCOMM_WAITI, - IRCOMM_WAITR, - IRCOMM_CONN, -} IRCOMM_STATE; - -/* IrCOMM Events */ -typedef enum { - IRCOMM_CONNECT_REQUEST, - IRCOMM_CONNECT_RESPONSE, - IRCOMM_TTP_CONNECT_INDICATION, - IRCOMM_LMP_CONNECT_INDICATION, - IRCOMM_TTP_CONNECT_CONFIRM, - IRCOMM_LMP_CONNECT_CONFIRM, - - IRCOMM_LMP_DISCONNECT_INDICATION, - IRCOMM_TTP_DISCONNECT_INDICATION, - IRCOMM_DISCONNECT_REQUEST, - - IRCOMM_TTP_DATA_INDICATION, - IRCOMM_LMP_DATA_INDICATION, - IRCOMM_DATA_REQUEST, - IRCOMM_CONTROL_REQUEST, - IRCOMM_CONTROL_INDICATION, -} IRCOMM_EVENT; - -/* - * Used for passing information through the state-machine - */ -struct ircomm_info { - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - __u8 dlsap_sel; - LM_REASON reason; /* Reason for disconnect */ - __u32 max_data_size; - __u32 max_header_size; - - struct qos_info *qos; -}; - -extern const char *const ircomm_state[]; - -struct ircomm_cb; /* Forward decl. */ - -int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); -void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state); - -#endif diff --git a/include/net/irda/ircomm_lmp.h b/include/net/irda/ircomm_lmp.h deleted file mode 100644 index 5042a5021a04..000000000000 --- a/include/net/irda/ircomm_lmp.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_lmp.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 10:06:07 1999 - * Modified at: Fri Aug 13 07:32:32 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_LMP_H -#define IRCOMM_LMP_H - -#include - -int ircomm_open_lsap(struct ircomm_cb *self); - -#endif diff --git a/include/net/irda/ircomm_param.h b/include/net/irda/ircomm_param.h deleted file mode 100644 index 1f67432321c4..000000000000 --- a/include/net/irda/ircomm_param.h +++ /dev/null @@ -1,147 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_param.h - * Version: 1.0 - * Description: Parameter handling for the IrCOMM protocol - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Jun 7 08:47:28 1999 - * Modified at: Wed Aug 25 13:46:33 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_PARAMS_H -#define IRCOMM_PARAMS_H - -#include - -/* Parameters common to all service types */ -#define IRCOMM_SERVICE_TYPE 0x00 -#define IRCOMM_PORT_TYPE 0x01 /* Only used in LM-IAS */ -#define IRCOMM_PORT_NAME 0x02 /* Only used in LM-IAS */ - -/* Parameters for both 3 wire and 9 wire */ -#define IRCOMM_DATA_RATE 0x10 -#define IRCOMM_DATA_FORMAT 0x11 -#define IRCOMM_FLOW_CONTROL 0x12 -#define IRCOMM_XON_XOFF 0x13 -#define IRCOMM_ENQ_ACK 0x14 -#define IRCOMM_LINE_STATUS 0x15 -#define IRCOMM_BREAK 0x16 - -/* Parameters for 9 wire */ -#define IRCOMM_DTE 0x20 -#define IRCOMM_DCE 0x21 -#define IRCOMM_POLL 0x22 - -/* Service type (details) */ -#define IRCOMM_3_WIRE_RAW 0x01 -#define IRCOMM_3_WIRE 0x02 -#define IRCOMM_9_WIRE 0x04 -#define IRCOMM_CENTRONICS 0x08 - -/* Port type (details) */ -#define IRCOMM_SERIAL 0x00 -#define IRCOMM_PARALLEL 0x01 - -/* Data format (details) */ -#define IRCOMM_WSIZE_5 0x00 -#define IRCOMM_WSIZE_6 0x01 -#define IRCOMM_WSIZE_7 0x02 -#define IRCOMM_WSIZE_8 0x03 - -#define IRCOMM_1_STOP_BIT 0x00 -#define IRCOMM_2_STOP_BIT 0x04 /* 1.5 if char len 5 */ - -#define IRCOMM_PARITY_DISABLE 0x00 -#define IRCOMM_PARITY_ENABLE 0x08 - -#define IRCOMM_PARITY_ODD 0x00 -#define IRCOMM_PARITY_EVEN 0x10 -#define IRCOMM_PARITY_MARK 0x20 -#define IRCOMM_PARITY_SPACE 0x30 - -/* Flow control */ -#define IRCOMM_XON_XOFF_IN 0x01 -#define IRCOMM_XON_XOFF_OUT 0x02 -#define IRCOMM_RTS_CTS_IN 0x04 -#define IRCOMM_RTS_CTS_OUT 0x08 -#define IRCOMM_DSR_DTR_IN 0x10 -#define IRCOMM_DSR_DTR_OUT 0x20 -#define IRCOMM_ENQ_ACK_IN 0x40 -#define IRCOMM_ENQ_ACK_OUT 0x80 - -/* Line status */ -#define IRCOMM_OVERRUN_ERROR 0x02 -#define IRCOMM_PARITY_ERROR 0x04 -#define IRCOMM_FRAMING_ERROR 0x08 - -/* DTE (Data terminal equipment) line settings */ -#define IRCOMM_DELTA_DTR 0x01 -#define IRCOMM_DELTA_RTS 0x02 -#define IRCOMM_DTR 0x04 -#define IRCOMM_RTS 0x08 - -/* DCE (Data communications equipment) line settings */ -#define IRCOMM_DELTA_CTS 0x01 /* Clear to send has changed */ -#define IRCOMM_DELTA_DSR 0x02 /* Data set ready has changed */ -#define IRCOMM_DELTA_RI 0x04 /* Ring indicator has changed */ -#define IRCOMM_DELTA_CD 0x08 /* Carrier detect has changed */ -#define IRCOMM_CTS 0x10 /* Clear to send is high */ -#define IRCOMM_DSR 0x20 /* Data set ready is high */ -#define IRCOMM_RI 0x40 /* Ring indicator is high */ -#define IRCOMM_CD 0x80 /* Carrier detect is high */ -#define IRCOMM_DCE_DELTA_ANY 0x0f - -/* - * Parameter state - */ -struct ircomm_params { - /* General control params */ - __u8 service_type; - __u8 port_type; - char port_name[32]; - - /* Control params for 3- and 9-wire service type */ - __u32 data_rate; /* Data rate in bps */ - __u8 data_format; - __u8 flow_control; - char xonxoff[2]; - char enqack[2]; - __u8 line_status; - __u8 _break; - - __u8 null_modem; - - /* Control params for 9-wire service type */ - __u8 dte; - __u8 dce; - __u8 poll; - - /* Control params for Centronics service type */ -}; - -struct ircomm_tty_cb; /* Forward decl. */ - -int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush); - -extern pi_param_info_t ircomm_param_info; - -#endif /* IRCOMM_PARAMS_H */ - diff --git a/include/net/irda/ircomm_ttp.h b/include/net/irda/ircomm_ttp.h deleted file mode 100644 index c5627288bca3..000000000000 --- a/include/net/irda/ircomm_ttp.h +++ /dev/null @@ -1,37 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_ttp.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 10:06:07 1999 - * Modified at: Fri Aug 13 07:32:22 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTP_H -#define IRCOMM_TTP_H - -#include - -int ircomm_open_tsap(struct ircomm_cb *self); - -#endif - diff --git a/include/net/irda/ircomm_tty.h b/include/net/irda/ircomm_tty.h deleted file mode 100644 index 8d4f588974bc..000000000000 --- a/include/net/irda/ircomm_tty.h +++ /dev/null @@ -1,121 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Jun 6 23:24:22 1999 - * Modified at: Fri Jan 28 13:16:57 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTY_H -#define IRCOMM_TTY_H - -#include -#include -#include -#include /* struct tty_struct */ - -#include -#include -#include - -#define IRCOMM_TTY_PORTS 32 -#define IRCOMM_TTY_MAGIC 0x3432 -#define IRCOMM_TTY_MAJOR 161 -#define IRCOMM_TTY_MINOR 0 - -/* This is used as an initial value to max_header_size before the proper - * value is filled in (5 for ttp, 4 for lmp). This allow us to detect - * the state of the underlying connection. - Jean II */ -#define IRCOMM_TTY_HDR_UNINITIALISED 16 -/* Same for payload size. See qos.c for the smallest max data size */ -#define IRCOMM_TTY_DATA_UNINITIALISED (64 - IRCOMM_TTY_HDR_UNINITIALISED) - -/* - * IrCOMM TTY driver state - */ -struct ircomm_tty_cb { - irda_queue_t queue; /* Must be first */ - struct tty_port port; - magic_t magic; - - int state; /* Connect state */ - - struct ircomm_cb *ircomm; /* IrCOMM layer instance */ - - struct sk_buff *tx_skb; /* Transmit buffer */ - struct sk_buff *ctrl_skb; /* Control data buffer */ - - /* Parameters */ - struct ircomm_params settings; - - __u8 service_type; /* The service that we support */ - int client; /* True if we are a client */ - LOCAL_FLOW flow; /* IrTTP flow status */ - - int line; - - __u8 dlsap_sel; - __u8 slsap_sel; - - __u32 saddr; - __u32 daddr; - - __u32 max_data_size; /* Max data we can transmit in one packet */ - __u32 max_header_size; /* The amount of header space we must reserve */ - __u32 tx_data_size; /* Max data size of current tx_skb */ - - struct iriap_cb *iriap; /* Instance used for querying remote IAS */ - struct ias_object* obj; - void *skey; - void *ckey; - - struct timer_list watchdog_timer; - struct work_struct tqueue; - - /* Protect concurent access to : - * o self->ctrl_skb - * o self->tx_skb - * Maybe other things may gain to be protected as well... - * Jean II */ - spinlock_t spinlock; -}; - -void ircomm_tty_start(struct tty_struct *tty); -void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self); - -int ircomm_tty_tiocmget(struct tty_struct *tty); -int ircomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, - unsigned int clear); -int ircomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, - unsigned long arg); -void ircomm_tty_set_termios(struct tty_struct *tty, - struct ktermios *old_termios); - -#endif - - - - - - - diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h deleted file mode 100644 index 20dcbdf258cf..000000000000 --- a/include/net/irda/ircomm_tty_attach.h +++ /dev/null @@ -1,92 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty_attach.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Wed Jun 9 15:55:18 1999 - * Modified at: Fri Dec 10 21:04:55 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRCOMM_TTY_ATTACH_H -#define IRCOMM_TTY_ATTACH_H - -#include - -typedef enum { - IRCOMM_TTY_IDLE, - IRCOMM_TTY_SEARCH, - IRCOMM_TTY_QUERY_PARAMETERS, - IRCOMM_TTY_QUERY_LSAP_SEL, - IRCOMM_TTY_SETUP, - IRCOMM_TTY_READY, -} IRCOMM_TTY_STATE; - -/* IrCOMM TTY Events */ -typedef enum { - IRCOMM_TTY_ATTACH_CABLE, - IRCOMM_TTY_DETACH_CABLE, - IRCOMM_TTY_DATA_REQUEST, - IRCOMM_TTY_DATA_INDICATION, - IRCOMM_TTY_DISCOVERY_REQUEST, - IRCOMM_TTY_DISCOVERY_INDICATION, - IRCOMM_TTY_CONNECT_CONFIRM, - IRCOMM_TTY_CONNECT_INDICATION, - IRCOMM_TTY_DISCONNECT_REQUEST, - IRCOMM_TTY_DISCONNECT_INDICATION, - IRCOMM_TTY_WD_TIMER_EXPIRED, - IRCOMM_TTY_GOT_PARAMETERS, - IRCOMM_TTY_GOT_LSAPSEL, -} IRCOMM_TTY_EVENT; - -/* Used for passing information through the state-machine */ -struct ircomm_tty_info { - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - __u8 dlsap_sel; -}; - -extern const char *const ircomm_state[]; -extern const char *const ircomm_tty_state[]; - -int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, - struct sk_buff *skb, struct ircomm_tty_info *info); - - -int ircomm_tty_attach_cable(struct ircomm_tty_cb *self); -void ircomm_tty_detach_cable(struct ircomm_tty_cb *self); -void ircomm_tty_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -void ircomm_tty_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); -void ircomm_tty_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self); -void ircomm_tty_link_established(struct ircomm_tty_cb *self); - -#endif /* IRCOMM_TTY_ATTACH_H */ diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h deleted file mode 100644 index 92c8fb575213..000000000000 --- a/include/net/irda/irda.h +++ /dev/null @@ -1,115 +0,0 @@ -/********************************************************************* - * - * Filename: irda.h - * Version: 1.0 - * Description: IrDA common include file for kernel internal use - * Status: Stable - * Author: Dag Brattli - * Created at: Tue Dec 9 21:13:12 1997 - * Modified at: Fri Jan 28 13:16:32 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef NET_IRDA_H -#define NET_IRDA_H - -#include /* struct sk_buff */ -#include -#include /* sa_family_t in */ -#include - -typedef __u32 magic_t; - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -/* Hack to do small backoff when setting media busy in IrLAP */ -#ifndef SMALL -#define SMALL 5 -#endif - -#ifndef IRDA_MIN /* Lets not mix this MIN with other header files */ -#define IRDA_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef IRDA_ALIGN -# define IRDA_ALIGN __attribute__((aligned)) -#endif - -#ifdef CONFIG_IRDA_DEBUG -#define IRDA_ASSERT(expr, func) \ -do { if(!(expr)) { \ - printk( "Assertion failed! %s:%s:%d %s\n", \ - __FILE__,__func__,__LINE__,(#expr) ); \ - func } } while (0) -#define IRDA_ASSERT_LABEL(label) label -#else -#define IRDA_ASSERT(expr, func) do { (void)(expr); } while (0) -#define IRDA_ASSERT_LABEL(label) -#endif /* CONFIG_IRDA_DEBUG */ - -/* - * Magic numbers used by Linux-IrDA. Random numbers which must be unique to - * give the best protection - */ - -#define IRTTY_MAGIC 0x2357 -#define LAP_MAGIC 0x1357 -#define LMP_MAGIC 0x4321 -#define LMP_LSAP_MAGIC 0x69333 -#define LMP_LAP_MAGIC 0x3432 -#define IRDA_DEVICE_MAGIC 0x63454 -#define IAS_MAGIC 0x007 -#define TTP_MAGIC 0x241169 -#define TTP_TSAP_MAGIC 0x4345 -#define IROBEX_MAGIC 0x341324 -#define HB_MAGIC 0x64534 -#define IRLAN_MAGIC 0x754 -#define IAS_OBJECT_MAGIC 0x34234 -#define IAS_ATTRIB_MAGIC 0x45232 -#define IRDA_TASK_MAGIC 0x38423 - -#define IAS_DEVICE_ID 0x0000 /* Defined by IrDA, IrLMP section 4.1 (page 68) */ -#define IAS_PNP_ID 0xd342 -#define IAS_OBEX_ID 0x34323 -#define IAS_IRLAN_ID 0x34234 -#define IAS_IRCOMM_ID 0x2343 -#define IAS_IRLPT_ID 0x9876 - -struct net_device; -struct packet_type; - -void irda_proc_register(void); -void irda_proc_unregister(void); - -int irda_sysctl_register(void); -void irda_sysctl_unregister(void); - -int irsock_init(void); -void irsock_cleanup(void); - -int irda_nl_register(void); -void irda_nl_unregister(void); - -int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev); - -#endif /* NET_IRDA_H */ diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h deleted file mode 100644 index 664bf8178412..000000000000 --- a/include/net/irda/irda_device.h +++ /dev/null @@ -1,285 +0,0 @@ -/********************************************************************* - * - * Filename: irda_device.h - * Version: 0.9 - * Description: Contains various declarations used by the drivers - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Apr 14 12:41:42 1998 - * Modified at: Mon Mar 20 09:08:57 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 1998 Thomas Davis, , - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -/* - * This header contains all the IrDA definitions a driver really - * needs, and therefore the driver should not need to include - * any other IrDA headers - Jean II - */ - -#ifndef IRDA_DEVICE_H -#define IRDA_DEVICE_H - -#include -#include -#include -#include /* struct sk_buff */ -#include -#include - -#include -#include -#include /* struct qos_info */ -#include /* irda_queue_t */ - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; - -/* Some non-standard interface flags (should not conflict with any in if.h) */ -#define IFF_SIR 0x0001 /* Supports SIR speeds */ -#define IFF_MIR 0x0002 /* Supports MIR speeds */ -#define IFF_FIR 0x0004 /* Supports FIR speeds */ -#define IFF_VFIR 0x0008 /* Supports VFIR speeds */ -#define IFF_PIO 0x0010 /* Supports PIO transfer of data */ -#define IFF_DMA 0x0020 /* Supports DMA transfer of data */ -#define IFF_SHM 0x0040 /* Supports shared memory data transfers */ -#define IFF_DONGLE 0x0080 /* Interface has a dongle attached */ -#define IFF_AIR 0x0100 /* Supports Advanced IR (AIR) standards */ - -#define IO_XMIT 0x01 -#define IO_RECV 0x02 - -typedef enum { - IRDA_IRLAP, /* IrDA mode, and deliver to IrLAP */ - IRDA_RAW, /* IrDA mode */ - SHARP_ASK, - TV_REMOTE, /* Also known as Consumer Electronics IR */ -} INFRARED_MODE; - -typedef enum { - IRDA_TASK_INIT, /* All tasks are initialized with this state */ - IRDA_TASK_DONE, /* Signals that the task is finished */ - IRDA_TASK_WAIT, - IRDA_TASK_WAIT1, - IRDA_TASK_WAIT2, - IRDA_TASK_WAIT3, - IRDA_TASK_CHILD_INIT, /* Initializing child task */ - IRDA_TASK_CHILD_WAIT, /* Waiting for child task to finish */ - IRDA_TASK_CHILD_DONE /* Child task is finished */ -} IRDA_TASK_STATE; - -struct irda_task; -typedef int (*IRDA_TASK_CALLBACK) (struct irda_task *task); - -struct irda_task { - irda_queue_t q; - magic_t magic; - - IRDA_TASK_STATE state; - IRDA_TASK_CALLBACK function; - IRDA_TASK_CALLBACK finished; - - struct irda_task *parent; - struct timer_list timer; - - void *instance; /* Instance being called */ - void *param; /* Parameter to be used by instance */ -}; - -/* Dongle info */ -struct dongle_reg; -typedef struct { - struct dongle_reg *issue; /* Registration info */ - struct net_device *dev; /* Device we are attached to */ - struct irda_task *speed_task; /* Task handling speed change */ - struct irda_task *reset_task; /* Task handling reset */ - __u32 speed; /* Current speed */ - - /* Callbacks to the IrDA device driver */ - int (*set_mode)(struct net_device *, int mode); - int (*read)(struct net_device *dev, __u8 *buf, int len); - int (*write)(struct net_device *dev, __u8 *buf, int len); - int (*set_dtr_rts)(struct net_device *dev, int dtr, int rts); -} dongle_t; - -/* Dongle registration info */ -struct dongle_reg { - irda_queue_t q; /* Must be first */ - IRDA_DONGLE type; - - void (*open)(dongle_t *dongle, struct qos_info *qos); - void (*close)(dongle_t *dongle); - int (*reset)(struct irda_task *task); - int (*change_speed)(struct irda_task *task); - struct module *owner; -}; - -/* - * Per-packet information we need to hide inside sk_buff - * (must not exceed 48 bytes, check with struct sk_buff) - * The default_qdisc_pad field is a temporary hack. - */ -struct irda_skb_cb { - unsigned int default_qdisc_pad; - magic_t magic; /* Be sure that we can trust the information */ - __u32 next_speed; /* The Speed to be set *after* this frame */ - __u16 mtt; /* Minimum turn around time */ - __u16 xbofs; /* Number of xbofs required, used by SIR mode */ - __u16 next_xbofs; /* Number of xbofs required *after* this frame */ - void *context; /* May be used by drivers */ - void (*destructor)(struct sk_buff *skb); /* Used for flow control */ - __u16 xbofs_delay; /* Number of xbofs used for generating the mtt */ - __u8 line; /* Used by IrCOMM in IrLPT mode */ -}; - -/* Chip specific info */ -typedef struct { - int cfg_base; /* Config register IO base */ - int sir_base; /* SIR IO base */ - int fir_base; /* FIR IO base */ - int mem_base; /* Shared memory base */ - int sir_ext; /* Length of SIR iobase */ - int fir_ext; /* Length of FIR iobase */ - int irq, irq2; /* Interrupts used */ - int dma, dma2; /* DMA channel(s) used */ - int fifo_size; /* FIFO size */ - int irqflags; /* interrupt flags (ie, IRQF_SHARED) */ - int direction; /* Link direction, used by some FIR drivers */ - int enabled; /* Powered on? */ - int suspended; /* Suspended by APM */ - __u32 speed; /* Currently used speed */ - __u32 new_speed; /* Speed we must change to when Tx is finished */ - int dongle_id; /* Dongle or transceiver currently used */ -} chipio_t; - -/* IO buffer specific info (inspired by struct sk_buff) */ -typedef struct { - int state; /* Receiving state (transmit state not used) */ - int in_frame; /* True if receiving frame */ - - __u8 *head; /* start of buffer */ - __u8 *data; /* start of data in buffer */ - - int len; /* current length of data */ - int truesize; /* total allocated size of buffer */ - __u16 fcs; - - struct sk_buff *skb; /* ZeroCopy Rx in async_unwrap_char() */ -} iobuff_t; - -/* Maximum SIR frame (skb) that we expect to receive *unwrapped*. - * Max LAP MTU (I field) is 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40). - * Max LAP header is 2 bytes (for now). - * Max CRC is 2 bytes at SIR, 4 bytes at FIR. - * Need 1 byte for skb_reserve() to align IP header for IrLAN. - * Add a few extra bytes just to be safe (buffer is power of two anyway) - * Jean II */ -#define IRDA_SKB_MAX_MTU 2064 -/* Maximum SIR frame that we expect to send, wrapped (i.e. with XBOFS - * and escaped characters on top of above). */ -#define IRDA_SIR_MAX_FRAME 4269 - -/* The SIR unwrapper async_unwrap_char() will use a Rx-copy-break mechanism - * when using the optional ZeroCopy Rx, where only small frames are memcpy - * to a smaller skb to save memory. This is the threshold under which copy - * will happen (and over which it won't happen). - * Some FIR drivers may use this #define as well... - * This is the same value as various Ethernet drivers. - Jean II */ -#define IRDA_RX_COPY_THRESHOLD 256 - -/* Function prototypes */ -int irda_device_init(void); -void irda_device_cleanup(void); - -/* IrLAP entry points used by the drivers. - * We declare them here to avoid the driver pulling a whole bunch stack - * headers they don't really need - Jean II */ -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name); -void irlap_close(struct irlap_cb *self); - -/* Interface to be uses by IrLAP */ -void irda_device_set_media_busy(struct net_device *dev, int status); -int irda_device_is_media_busy(struct net_device *dev); -int irda_device_is_receiving(struct net_device *dev); - -/* Interface for internal use */ -static inline int irda_device_txqueue_empty(const struct net_device *dev) -{ - return qdisc_all_tx_empty(dev); -} -int irda_device_set_raw_mode(struct net_device* self, int status); -struct net_device *alloc_irdadev(int sizeof_priv); - -void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode); - -/* - * Function irda_get_mtt (skb) - * - * Utility function for getting the minimum turnaround time out of - * the skb, where it has been hidden in the cb field. - */ -static inline __u16 irda_get_mtt(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->mtt : 10000; -} - -/* - * Function irda_get_next_speed (skb) - * - * Extract the speed that should be set *after* this frame from the skb - * - * Note : return -1 for user space frames - */ -static inline __u32 irda_get_next_speed(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->next_speed : -1; -} - -/* - * Function irda_get_next_xbofs (skb) - * - * Extract the xbofs that should be set for this frame from the skb - * - * Note : default to 10 for user space frames - */ -static inline __u16 irda_get_xbofs(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->xbofs : 10; -} - -/* - * Function irda_get_next_xbofs (skb) - * - * Extract the xbofs that should be set *after* this frame from the skb - * - * Note : return -1 for user space frames - */ -static inline __u16 irda_get_next_xbofs(const struct sk_buff *skb) -{ - const struct irda_skb_cb *cb = (const struct irda_skb_cb *) skb->cb; - return (cb->magic == LAP_MAGIC) ? cb->next_xbofs : -1; -} -#endif /* IRDA_DEVICE_H */ - - diff --git a/include/net/irda/iriap.h b/include/net/irda/iriap.h deleted file mode 100644 index fcc896491a95..000000000000 --- a/include/net/irda/iriap.h +++ /dev/null @@ -1,108 +0,0 @@ -/********************************************************************* - * - * Filename: iriap.h - * Version: 0.5 - * Description: Information Access Protocol (IAP) - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Aug 21 00:02:07 1997 - * Modified at: Sat Dec 25 16:42:09 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997-1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRIAP_H -#define IRIAP_H - -#include -#include - -#include -#include -#include /* irda_queue_t */ -#include /* struct timer_list */ - -#define IAP_LST 0x80 -#define IAP_ACK 0x40 - -#define IAS_SERVER 0 -#define IAS_CLIENT 1 - -/* IrIAP Op-codes */ -#define GET_INFO_BASE 0x01 -#define GET_OBJECTS 0x02 -#define GET_VALUE 0x03 -#define GET_VALUE_BY_CLASS 0x04 -#define GET_OBJECT_INFO 0x05 -#define GET_ATTRIB_NAMES 0x06 - -#define IAS_SUCCESS 0 -#define IAS_CLASS_UNKNOWN 1 -#define IAS_ATTRIB_UNKNOWN 2 -#define IAS_DISCONNECT 10 - -typedef void (*CONFIRM_CALLBACK)(int result, __u16 obj_id, - struct ias_value *value, void *priv); - -struct iriap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; /* Magic cookie */ - - int mode; /* Client or server */ - - __u32 saddr; - __u32 daddr; - __u8 operation; - - struct sk_buff *request_skb; - struct lsap_cb *lsap; - __u8 slsap_sel; - - /* Client states */ - IRIAP_STATE client_state; - IRIAP_STATE call_state; - - /* Server states */ - IRIAP_STATE server_state; - IRIAP_STATE r_connect_state; - - CONFIRM_CALLBACK confirm; - void *priv; /* Used to identify client */ - - __u8 max_header_size; - __u32 max_data_size; - - struct timer_list watchdog_timer; -}; - -int iriap_init(void); -void iriap_cleanup(void); - -struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, - CONFIRM_CALLBACK callback); -void iriap_close(struct iriap_cb *self); - -int iriap_getvaluebyclass_request(struct iriap_cb *self, - __u32 saddr, __u32 daddr, - char *name, char *attr); -void iriap_connect_request(struct iriap_cb *self); -void iriap_send_ack( struct iriap_cb *self); -void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb); - -void iriap_register_server(void); - -#endif - - diff --git a/include/net/irda/iriap_event.h b/include/net/irda/iriap_event.h deleted file mode 100644 index 89747f06d9eb..000000000000 --- a/include/net/irda/iriap_event.h +++ /dev/null @@ -1,85 +0,0 @@ -/********************************************************************* - * - * Filename: iriap_event.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Sun Oct 31 22:02:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRIAP_FSM_H -#define IRIAP_FSM_H - -/* Forward because of circular include dependecies */ -struct iriap_cb; - -/* IrIAP states */ -typedef enum { - /* Client */ - S_DISCONNECT, - S_CONNECTING, - S_CALL, - - /* S-Call */ - S_MAKE_CALL, - S_CALLING, - S_OUTSTANDING, - S_REPLYING, - S_WAIT_FOR_CALL, - S_WAIT_ACTIVE, - - /* Server */ - R_DISCONNECT, - R_CALL, - - /* R-Connect */ - R_WAITING, - R_WAIT_ACTIVE, - R_RECEIVING, - R_EXECUTE, - R_RETURNING, -} IRIAP_STATE; - -typedef enum { - IAP_CALL_REQUEST, - IAP_CALL_REQUEST_GVBC, - IAP_CALL_RESPONSE, - IAP_RECV_F_LST, - IAP_LM_DISCONNECT_INDICATION, - IAP_LM_CONNECT_INDICATION, - IAP_LM_CONNECT_CONFIRM, -} IRIAP_EVENT; - -void iriap_next_client_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_call_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_server_state (struct iriap_cb *self, IRIAP_STATE state); -void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state); - - -void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -void iriap_do_call_event (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -void iriap_do_server_event (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -#endif /* IRIAP_FSM_H */ - diff --git a/include/net/irda/irias_object.h b/include/net/irda/irias_object.h deleted file mode 100644 index 83f78081799c..000000000000 --- a/include/net/irda/irias_object.h +++ /dev/null @@ -1,108 +0,0 @@ -/********************************************************************* - * - * Filename: irias_object.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Oct 1 22:49:50 1998 - * Modified at: Wed Dec 15 11:20:57 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef LM_IAS_OBJECT_H -#define LM_IAS_OBJECT_H - -#include -#include - -/* LM-IAS Attribute types */ -#define IAS_MISSING 0 -#define IAS_INTEGER 1 -#define IAS_OCT_SEQ 2 -#define IAS_STRING 3 - -/* Object ownership of attributes (user or kernel) */ -#define IAS_KERNEL_ATTR 0 -#define IAS_USER_ATTR 1 - -/* - * LM-IAS Object - */ -struct ias_object { - irda_queue_t q; /* Must be first! */ - magic_t magic; - - char *name; - int id; - hashbin_t *attribs; -}; - -/* - * Values used by LM-IAS attributes - */ -struct ias_value { - __u8 type; /* Value description */ - __u8 owner; /* Managed from user/kernel space */ - int charset; /* Only used by string type */ - int len; - - /* Value */ - union { - int integer; - char *string; - __u8 *oct_seq; - } t; -}; - -/* - * Attributes used by LM-IAS objects - */ -struct ias_attrib { - irda_queue_t q; /* Must be first! */ - int magic; - - char *name; /* Attribute name */ - struct ias_value *value; /* Attribute value */ -}; - -struct ias_object *irias_new_object(char *name, int id); -void irias_insert_object(struct ias_object *obj); -int irias_delete_object(struct ias_object *obj); -int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib, - int cleanobject); -void __irias_delete_object(struct ias_object *obj); - -void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, - int user); -void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, - int user); -void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, - int len, int user); -int irias_object_change_attribute(char *obj_name, char *attrib_name, - struct ias_value *new_value); -struct ias_object *irias_find_object(char *name); -struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name); - -struct ias_value *irias_new_string_value(char *string); -struct ias_value *irias_new_integer_value(int integer); -struct ias_value *irias_new_octseq_value(__u8 *octseq , int len); -struct ias_value *irias_new_missing_value(void); -void irias_delete_value(struct ias_value *value); - -extern struct ias_value irias_missing; -extern hashbin_t *irias_objects; - -#endif diff --git a/include/net/irda/irlan_client.h b/include/net/irda/irlan_client.h deleted file mode 100644 index fa8455eda280..000000000000 --- a/include/net/irda/irlan_client.h +++ /dev/null @@ -1,42 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_client.h - * Version: 0.3 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Thu Apr 22 14:13:34 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_CLIENT_H -#define IRLAN_CLIENT_H - -#include -#include -#include -#include - -#include -#include - -void irlan_client_discovery_indication(discinfo_t *, DISCOVERY_MODE, void *); -void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr); - -void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb); -void irlan_client_get_value_confirm(int result, __u16 obj_id, - struct ias_value *value, void *priv); -#endif diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h deleted file mode 100644 index 550c2d6ec7ff..000000000000 --- a/include/net/irda/irlan_common.h +++ /dev/null @@ -1,230 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_common.h - * Version: 0.8 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun Oct 31 19:41:24 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_H -#define IRLAN_H - -#include /* for HZ */ - -#include -#include -#include -#include -#include - -#include - -#define IRLAN_MTU 1518 -#define IRLAN_TIMEOUT 10*HZ /* 10 seconds */ - -/* Command packet types */ -#define CMD_GET_PROVIDER_INFO 0 -#define CMD_GET_MEDIA_CHAR 1 -#define CMD_OPEN_DATA_CHANNEL 2 -#define CMD_CLOSE_DATA_CHAN 3 -#define CMD_RECONNECT_DATA_CHAN 4 -#define CMD_FILTER_OPERATION 5 - -/* Some responses */ -#define RSP_SUCCESS 0 -#define RSP_INSUFFICIENT_RESOURCES 1 -#define RSP_INVALID_COMMAND_FORMAT 2 -#define RSP_COMMAND_NOT_SUPPORTED 3 -#define RSP_PARAM_NOT_SUPPORTED 4 -#define RSP_VALUE_NOT_SUPPORTED 5 -#define RSP_NOT_OPEN 6 -#define RSP_AUTHENTICATION_REQUIRED 7 -#define RSP_INVALID_PASSWORD 8 -#define RSP_PROTOCOL_ERROR 9 -#define RSP_ASYNCHRONOUS_ERROR 255 - -/* Media types */ -#define MEDIA_802_3 1 -#define MEDIA_802_5 2 - -/* Filter parameters */ -#define DATA_CHAN 1 -#define FILTER_TYPE 2 -#define FILTER_MODE 3 - -/* Filter types */ -#define IRLAN_DIRECTED 0x01 -#define IRLAN_FUNCTIONAL 0x02 -#define IRLAN_GROUP 0x04 -#define IRLAN_MAC_FRAME 0x08 -#define IRLAN_MULTICAST 0x10 -#define IRLAN_BROADCAST 0x20 -#define IRLAN_IPX_SOCKET 0x40 - -/* Filter modes */ -#define ALL 1 -#define FILTER 2 -#define NONE 3 - -/* Filter operations */ -#define GET 1 -#define CLEAR 2 -#define ADD 3 -#define REMOVE 4 -#define DYNAMIC 5 - -/* Access types */ -#define ACCESS_DIRECT 1 -#define ACCESS_PEER 2 -#define ACCESS_HOSTED 3 - -#define IRLAN_BYTE 0 -#define IRLAN_SHORT 1 -#define IRLAN_ARRAY 2 - -/* IrLAN sits on top if IrTTP */ -#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER) -/* 1 byte for the command code and 1 byte for the parameter count */ -#define IRLAN_CMD_HEADER 2 - -#define IRLAN_STRING_PARAMETER_LEN(name, value) (1 + strlen((name)) + 2 \ - + strlen ((value))) -#define IRLAN_BYTE_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 1) -#define IRLAN_SHORT_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 2) - -/* - * IrLAN client - */ -struct irlan_client_cb { - int state; - - int open_retries; - - struct tsap_cb *tsap_ctrl; - __u32 max_sdu_size; - __u8 max_header_size; - - int access_type; /* Access type of provider */ - __u8 reconnect_key[255]; - __u8 key_len; - - __u16 recv_arb_val; - __u16 max_frame; - int filter_type; - - int unicast_open; - int broadcast_open; - - int tx_busy; - struct sk_buff_head txq; /* Transmit control queue */ - - struct iriap_cb *iriap; - - struct timer_list kick_timer; -}; - -/* - * IrLAN provider - */ -struct irlan_provider_cb { - int state; - - struct tsap_cb *tsap_ctrl; - __u32 max_sdu_size; - __u8 max_header_size; - - /* - * Store some values here which are used by the provider to parse - * the filter operations - */ - int data_chan; - int filter_type; - int filter_mode; - int filter_operation; - int filter_entry; - int access_type; /* Access type */ - __u16 send_arb_val; - - __u8 mac_address[ETH_ALEN]; /* Generated MAC address for peer device */ -}; - -/* - * IrLAN control block - */ -struct irlan_cb { - int magic; - struct list_head dev_list; - struct net_device *dev; /* Ethernet device structure*/ - - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - int disconnect_reason; /* Why we got disconnected */ - - int media; /* Media type */ - __u8 version[2]; /* IrLAN version */ - - struct tsap_cb *tsap_data; /* Data TSAP */ - - int use_udata; /* Use Unit Data transfers */ - - __u8 stsap_sel_data; /* Source data TSAP selector */ - __u8 dtsap_sel_data; /* Destination data TSAP selector */ - __u8 dtsap_sel_ctrl; /* Destination ctrl TSAP selector */ - - struct irlan_client_cb client; /* Client specific fields */ - struct irlan_provider_cb provider; /* Provider specific fields */ - - __u32 max_sdu_size; - __u8 max_header_size; - - wait_queue_head_t open_wait; - struct timer_list watchdog_timer; -}; - -void irlan_close(struct irlan_cb *self); -void irlan_close_tsaps(struct irlan_cb *self); - -int irlan_register_netdev(struct irlan_cb *self); -void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel); -void irlan_start_watchdog_timer(struct irlan_cb *self, int timeout); - -void irlan_open_data_tsap(struct irlan_cb *self); - -int irlan_run_ctrl_tx_queue(struct irlan_cb *self); - -struct irlan_cb *irlan_get_any(void); -void irlan_get_provider_info(struct irlan_cb *self); -void irlan_get_media_char(struct irlan_cb *self); -void irlan_open_data_channel(struct irlan_cb *self); -void irlan_close_data_channel(struct irlan_cb *self); -void irlan_set_multicast_filter(struct irlan_cb *self, int status); -void irlan_set_broadcast_filter(struct irlan_cb *self, int status); - -int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value); -int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value); -int irlan_insert_string_param(struct sk_buff *skb, char *param, char *value); -int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *value, - __u16 value_len); - -int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len); - -#endif - - diff --git a/include/net/irda/irlan_eth.h b/include/net/irda/irlan_eth.h deleted file mode 100644 index de5c81691f33..000000000000 --- a/include/net/irda/irlan_eth.h +++ /dev/null @@ -1,32 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_eth.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Thu Oct 15 08:36:58 1998 - * Modified at: Fri May 14 23:29:00 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_ETH_H -#define IRLAN_ETH_H - -struct net_device *alloc_irlandev(const char *name); -int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb); - -void irlan_eth_flow_indication( void *instance, void *sap, LOCAL_FLOW flow); -#endif diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h deleted file mode 100644 index 018b5a77e610..000000000000 --- a/include/net/irda/irlan_event.h +++ /dev/null @@ -1,81 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_event.h - * Version: - * Description: LAN access - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Tue Feb 2 09:45:17 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_EVENT_H -#define IRLAN_EVENT_H - -#include -#include - -#include - -typedef enum { - IRLAN_IDLE, - IRLAN_QUERY, - IRLAN_CONN, - IRLAN_INFO, - IRLAN_MEDIA, - IRLAN_OPEN, - IRLAN_WAIT, - IRLAN_ARB, - IRLAN_DATA, - IRLAN_CLOSE, - IRLAN_SYNC -} IRLAN_STATE; - -typedef enum { - IRLAN_DISCOVERY_INDICATION, - IRLAN_IAS_PROVIDER_AVAIL, - IRLAN_IAS_PROVIDER_NOT_AVAIL, - IRLAN_LAP_DISCONNECT, - IRLAN_LMP_DISCONNECT, - IRLAN_CONNECT_COMPLETE, - IRLAN_DATA_INDICATION, - IRLAN_DATA_CONNECT_INDICATION, - IRLAN_RETRY_CONNECT, - - IRLAN_CONNECT_INDICATION, - IRLAN_GET_INFO_CMD, - IRLAN_GET_MEDIA_CMD, - IRLAN_OPEN_DATA_CMD, - IRLAN_FILTER_CONFIG_CMD, - - IRLAN_CHECK_CON_ARB, - IRLAN_PROVIDER_SIGNAL, - - IRLAN_WATCHDOG_TIMEOUT, -} IRLAN_EVENT; - -extern const char * const irlan_state[]; - -void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state); -void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state); - -#endif diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h deleted file mode 100644 index a5a2539485bd..000000000000 --- a/include/net/irda/irlan_filter.h +++ /dev/null @@ -1,35 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_filter.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Jan 29 15:24:08 1999 - * Modified at: Sun Feb 7 23:35:31 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_FILTER_H -#define IRLAN_FILTER_H - -void irlan_check_command_param(struct irlan_cb *self, char *param, - char *value); -void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); -#ifdef CONFIG_PROC_FS -void irlan_print_filter(struct seq_file *seq, int filter_type); -#endif - -#endif /* IRLAN_FILTER_H */ diff --git a/include/net/irda/irlan_provider.h b/include/net/irda/irlan_provider.h deleted file mode 100644 index 92f3b0e1029b..000000000000 --- a/include/net/irda/irlan_provider.h +++ /dev/null @@ -1,52 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_provider.h - * Version: 0.1 - * Description: IrDA LAN access layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun May 9 12:26:11 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAN_SERVER_H -#define IRLAN_SERVER_H - -#include -#include -#include -#include - -#include - -void irlan_provider_ctrl_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); - - -void irlan_provider_connect_response(struct irlan_cb *, struct tsap_cb *); - -int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb); -int irlan_provider_parse_command(struct irlan_cb *self, int cmd, - struct sk_buff *skb); - -void irlan_provider_send_reply(struct irlan_cb *self, int command, - int ret_code); -int irlan_provider_open_ctrl_tsap(struct irlan_cb *self); - -#endif - - diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h deleted file mode 100644 index 6f23e820618c..000000000000 --- a/include/net/irda/irlap.h +++ /dev/null @@ -1,311 +0,0 @@ -/********************************************************************* - * - * Filename: irlap.h - * Version: 0.8 - * Description: An IrDA LAP driver for Linux - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Fri Dec 10 13:21:17 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLAP_H -#define IRLAP_H - -#include -#include -#include -#include - -#include /* irda_queue_t */ -#include /* struct qos_info */ -#include /* discovery_t */ -#include /* IRLAP_STATE, ... */ -#include /* struct notify_t */ - -#define CONFIG_IRDA_DYNAMIC_WINDOW 1 - -#define LAP_RELIABLE 1 -#define LAP_UNRELIABLE 0 - -#define LAP_ADDR_HEADER 1 /* IrLAP Address Header */ -#define LAP_CTRL_HEADER 1 /* IrLAP Control Header */ - -/* May be different when we get VFIR */ -#define LAP_MAX_HEADER (LAP_ADDR_HEADER + LAP_CTRL_HEADER) - -/* Each IrDA device gets a random 32 bits IRLAP device address */ -#define LAP_ALEN 4 - -#define BROADCAST 0xffffffff /* Broadcast device address */ -#define CBROADCAST 0xfe /* Connection broadcast address */ -#define XID_FORMAT 0x01 /* Discovery XID format */ - -/* Nobody seems to use this constant. */ -#define LAP_WINDOW_SIZE 8 -/* We keep the LAP queue very small to minimise the amount of buffering. - * this improve latency and reduce resource consumption. - * This work only because we have synchronous refilling of IrLAP through - * the flow control mechanism (via scheduler and IrTTP). - * 2 buffers is the minimum we can work with, one that we send while polling - * IrTTP, and another to know that we should not send the pf bit. - * Jean II */ -#define LAP_HIGH_THRESHOLD 2 -/* Some rare non TTP clients don't implement flow control, and - * so don't comply with the above limit (and neither with this one). - * For IAP and management, it doesn't matter, because they never transmit much. - *.For IrLPT, this should be fixed. - * - Jean II */ -#define LAP_MAX_QUEUE 10 -/* Please note that all IrDA management frames (LMP/TTP conn req/disc and - * IAS queries) fall in the second category and are sent to LAP even if TTP - * is stopped. This means that those frames will wait only a maximum of - * two (2) data frames before beeing sent on the "wire", which speed up - * new socket setup when the link is saturated. - * Same story for two sockets competing for the medium : if one saturates - * the LAP, when the other want to transmit it only has to wait for - * maximum three (3) packets (2 + one scheduling), which improve performance - * of delay sensitive applications. - * Jean II */ - -#define NR_EXPECTED 1 -#define NR_UNEXPECTED 0 -#define NR_INVALID -1 - -#define NS_EXPECTED 1 -#define NS_UNEXPECTED 0 -#define NS_INVALID -1 - -/* - * Meta information passed within the IrLAP state machine - */ -struct irlap_info { - __u8 caddr; /* Connection address */ - __u8 control; /* Frame type */ - __u8 cmd; - - __u32 saddr; - __u32 daddr; - - int pf; /* Poll/final bit set */ - - __u8 nr; /* Sequence number of next frame expected */ - __u8 ns; /* Sequence number of frame sent */ - - int S; /* Number of slots */ - int slot; /* Random chosen slot */ - int s; /* Current slot */ - - discovery_t *discovery; /* Discovery information */ -}; - -/* Main structure of IrLAP */ -struct irlap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; - - /* Device we are attached to */ - struct net_device *netdev; - char hw_name[2*IFNAMSIZ + 1]; - - /* Connection state */ - volatile IRLAP_STATE state; /* Current state */ - - /* Timers used by IrLAP */ - struct timer_list query_timer; - struct timer_list slot_timer; - struct timer_list discovery_timer; - struct timer_list final_timer; - struct timer_list poll_timer; - struct timer_list wd_timer; - struct timer_list backoff_timer; - - /* Media busy stuff */ - struct timer_list media_busy_timer; - int media_busy; - - /* Timeouts which will be different with different turn time */ - int slot_timeout; - int poll_timeout; - int final_timeout; - int wd_timeout; - - struct sk_buff_head txq; /* Frames to be transmitted */ - struct sk_buff_head txq_ultra; - - __u8 caddr; /* Connection address */ - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - - int retry_count; /* Times tried to establish connection */ - int add_wait; /* True if we are waiting for frame */ - - __u8 connect_pending; - __u8 disconnect_pending; - - /* To send a faster RR if tx queue empty */ -#ifdef CONFIG_IRDA_FAST_RR - int fast_RR_timeout; - int fast_RR; -#endif /* CONFIG_IRDA_FAST_RR */ - - int N1; /* N1 * F-timer = Negitiated link disconnect warning threshold */ - int N2; /* N2 * F-timer = Negitiated link disconnect time */ - int N3; /* Connection retry count */ - - int local_busy; - int remote_busy; - int xmitflag; - - __u8 vs; /* Next frame to be sent */ - __u8 vr; /* Next frame to be received */ - __u8 va; /* Last frame acked */ - int window; /* Nr of I-frames allowed to send */ - int window_size; /* Current negotiated window size */ - -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - __u32 line_capacity; /* Number of bytes allowed to send */ - __u32 bytes_left; /* Number of bytes still allowed to transmit */ -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - - struct sk_buff_head wx_list; - - __u8 ack_required; - - /* XID parameters */ - __u8 S; /* Number of slots */ - __u8 slot; /* Random chosen slot */ - __u8 s; /* Current slot */ - int frame_sent; /* Have we sent reply? */ - - hashbin_t *discovery_log; - discovery_t *discovery_cmd; - - __u32 speed; /* Link speed */ - - struct qos_info qos_tx; /* QoS requested by peer */ - struct qos_info qos_rx; /* QoS requested by self */ - struct qos_info *qos_dev; /* QoS supported by device */ - - notify_t notify; /* Callbacks to IrLMP */ - - int mtt_required; /* Minimum turnaround time required */ - int xbofs_delay; /* Nr of XBOF's used to MTT */ - int bofs_count; /* Negotiated extra BOFs */ - int next_bofs; /* Negotiated extra BOFs after next frame */ - - int mode; /* IrLAP mode (primary, secondary or monitor) */ -}; - -/* - * Function prototypes - */ -int irlap_init(void); -void irlap_cleanup(void); - -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name); -void irlap_close(struct irlap_cb *self); - -void irlap_connect_request(struct irlap_cb *self, __u32 daddr, - struct qos_info *qos, int sniff); -void irlap_connect_response(struct irlap_cb *self, struct sk_buff *skb); -void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb); -void irlap_connect_confirm(struct irlap_cb *, struct sk_buff *skb); - -void irlap_data_indication(struct irlap_cb *, struct sk_buff *, int unreliable); -void irlap_data_request(struct irlap_cb *, struct sk_buff *, int unreliable); - -#ifdef CONFIG_IRDA_ULTRA -void irlap_unitdata_request(struct irlap_cb *, struct sk_buff *); -void irlap_unitdata_indication(struct irlap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlap_disconnect_request(struct irlap_cb *); -void irlap_disconnect_indication(struct irlap_cb *, LAP_REASON reason); - -void irlap_status_indication(struct irlap_cb *, int quality_of_link); - -void irlap_test_request(__u8 *info, int len); - -void irlap_discovery_request(struct irlap_cb *, discovery_t *discovery); -void irlap_discovery_confirm(struct irlap_cb *, hashbin_t *discovery_log); -void irlap_discovery_indication(struct irlap_cb *, discovery_t *discovery); - -void irlap_reset_indication(struct irlap_cb *self); -void irlap_reset_confirm(void); - -void irlap_update_nr_received(struct irlap_cb *, int nr); -int irlap_validate_nr_received(struct irlap_cb *, int nr); -int irlap_validate_ns_received(struct irlap_cb *, int ns); - -int irlap_generate_rand_time_slot(int S, int s); -void irlap_initiate_connection_state(struct irlap_cb *); -void irlap_flush_all_queues(struct irlap_cb *); -void irlap_wait_min_turn_around(struct irlap_cb *, struct qos_info *); - -void irlap_apply_default_connection_parameters(struct irlap_cb *self); -void irlap_apply_connection_parameters(struct irlap_cb *self, int now); - -#define IRLAP_GET_HEADER_SIZE(self) (LAP_MAX_HEADER) -#define IRLAP_GET_TX_QUEUE_LEN(self) skb_queue_len(&self->txq) - -/* Return TRUE if the node is in primary mode (i.e. master) - * - Jean II */ -static inline int irlap_is_primary(struct irlap_cb *self) -{ - int ret; - switch(self->state) { - case LAP_XMIT_P: - case LAP_NRM_P: - ret = 1; - break; - case LAP_XMIT_S: - case LAP_NRM_S: - ret = 0; - break; - default: - ret = -1; - } - return ret; -} - -/* Clear a pending IrLAP disconnect. - Jean II */ -static inline void irlap_clear_disconnect(struct irlap_cb *self) -{ - self->disconnect_pending = FALSE; -} - -/* - * Function irlap_next_state (self, state) - * - * Switches state and provides debug information - * - */ -static inline void irlap_next_state(struct irlap_cb *self, IRLAP_STATE state) -{ - /* - if (!self || self->magic != LAP_MAGIC) - return; - - pr_debug("next LAP state = %s\n", irlap_state[state]); - */ - self->state = state; -} - -#endif diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h deleted file mode 100644 index e4325fee1267..000000000000 --- a/include/net/irda/irlap_event.h +++ /dev/null @@ -1,129 +0,0 @@ -/********************************************************************* - * - * - * Filename: irlap_event.h - * Version: 0.1 - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Tue Dec 21 11:20:30 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRLAP_EVENT_H -#define IRLAP_EVENT_H - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; -struct irlap_info; - -/* IrLAP States */ -typedef enum { - LAP_NDM, /* Normal disconnected mode */ - LAP_QUERY, - LAP_REPLY, - LAP_CONN, /* Connect indication */ - LAP_SETUP, /* Setting up connection */ - LAP_OFFLINE, /* A really boring state */ - LAP_XMIT_P, - LAP_PCLOSE, - LAP_NRM_P, /* Normal response mode as primary */ - LAP_RESET_WAIT, - LAP_RESET, - LAP_NRM_S, /* Normal response mode as secondary */ - LAP_XMIT_S, - LAP_SCLOSE, - LAP_RESET_CHECK, -} IRLAP_STATE; - -/* IrLAP Events */ -typedef enum { - /* Services events */ - DISCOVERY_REQUEST, - CONNECT_REQUEST, - CONNECT_RESPONSE, - DISCONNECT_REQUEST, - DATA_REQUEST, - RESET_REQUEST, - RESET_RESPONSE, - - /* Send events */ - SEND_I_CMD, - SEND_UI_FRAME, - - /* Receive events */ - RECV_DISCOVERY_XID_CMD, - RECV_DISCOVERY_XID_RSP, - RECV_SNRM_CMD, - RECV_TEST_CMD, - RECV_TEST_RSP, - RECV_UA_RSP, - RECV_DM_RSP, - RECV_RD_RSP, - RECV_I_CMD, - RECV_I_RSP, - RECV_UI_FRAME, - RECV_FRMR_RSP, - RECV_RR_CMD, - RECV_RR_RSP, - RECV_RNR_CMD, - RECV_RNR_RSP, - RECV_REJ_CMD, - RECV_REJ_RSP, - RECV_SREJ_CMD, - RECV_SREJ_RSP, - RECV_DISC_CMD, - - /* Timer events */ - SLOT_TIMER_EXPIRED, - QUERY_TIMER_EXPIRED, - FINAL_TIMER_EXPIRED, - POLL_TIMER_EXPIRED, - DISCOVERY_TIMER_EXPIRED, - WD_TIMER_EXPIRED, - BACKOFF_TIMER_EXPIRED, - MEDIA_BUSY_TIMER_EXPIRED, -} IRLAP_EVENT; - -/* - * Disconnect reason code - */ -typedef enum { /* FIXME check the two first reason codes */ - LAP_DISC_INDICATION=1, /* Received a disconnect request from peer */ - LAP_NO_RESPONSE, /* To many retransmits without response */ - LAP_RESET_INDICATION, /* To many retransmits, or invalid nr/ns */ - LAP_FOUND_NONE, /* No devices were discovered */ - LAP_MEDIA_BUSY, - LAP_PRIMARY_CONFLICT, -} LAP_REASON; - -extern const char *const irlap_state[]; - -void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -void irlap_print_event(IRLAP_EVENT event); - -int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb); - -#endif diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h deleted file mode 100644 index cbc12a926e5f..000000000000 --- a/include/net/irda/irlap_frame.h +++ /dev/null @@ -1,167 +0,0 @@ -/********************************************************************* - * - * Filename: irlap_frame.h - * Version: 0.9 - * Description: IrLAP frame declarations - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Aug 19 10:27:26 1997 - * Modified at: Sat Dec 25 21:07:26 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRLAP_FRAME_H -#define IRLAP_FRAME_H - -#include - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlap_cb; -struct discovery_t; - -/* Frame types and templates */ -#define INVALID 0xff - -/* Unnumbered (U) commands */ -#define SNRM_CMD 0x83 /* Set Normal Response Mode */ -#define DISC_CMD 0x43 /* Disconnect */ -#define XID_CMD 0x2f /* Exchange Station Identification */ -#define TEST_CMD 0xe3 /* Test */ - -/* Unnumbered responses */ -#define RNRM_RSP 0x83 /* Request Normal Response Mode */ -#define UA_RSP 0x63 /* Unnumbered Acknowledgement */ -#define FRMR_RSP 0x87 /* Frame Reject */ -#define DM_RSP 0x0f /* Disconnect Mode */ -#define RD_RSP 0x43 /* Request Disconnection */ -#define XID_RSP 0xaf /* Exchange Station Identification */ -#define TEST_RSP 0xe3 /* Test frame */ - -/* Supervisory (S) */ -#define RR 0x01 /* Receive Ready */ -#define REJ 0x09 /* Reject */ -#define RNR 0x05 /* Receive Not Ready */ -#define SREJ 0x0d /* Selective Reject */ - -/* Information (I) */ -#define I_FRAME 0x00 /* Information Format */ -#define UI_FRAME 0x03 /* Unnumbered Information */ - -#define CMD_FRAME 0x01 -#define RSP_FRAME 0x00 - -#define PF_BIT 0x10 /* Poll/final bit */ - -/* Some IrLAP field lengths */ -/* - * Only baud rate triplet is 4 bytes (PV can be 2 bytes). - * All others params (7) are 3 bytes, so that's 7*3 + 1*4 bytes. - */ -#define IRLAP_NEGOCIATION_PARAMS_LEN 25 -#define IRLAP_DISCOVERY_INFO_LEN 32 - -struct disc_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct xid_frame { - __u8 caddr; /* Connection address */ - __u8 control; - __u8 ident; /* Should always be XID_FORMAT */ - __le32 saddr; /* Source device address */ - __le32 daddr; /* Destination device address */ - __u8 flags; /* Discovery flags */ - __u8 slotnr; - __u8 version; -} __packed; - -struct test_frame { - __u8 caddr; /* Connection address */ - __u8 control; - __le32 saddr; /* Source device address */ - __le32 daddr; /* Destination device address */ -} __packed; - -struct ua_frame { - __u8 caddr; - __u8 control; - __le32 saddr; /* Source device address */ - __le32 daddr; /* Dest device address */ -} __packed; - -struct dm_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct rd_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct rr_frame { - __u8 caddr; /* Connection address */ - __u8 control; -} __packed; - -struct i_frame { - __u8 caddr; - __u8 control; -} __packed; - -struct snrm_frame { - __u8 caddr; - __u8 control; - __le32 saddr; - __le32 daddr; - __u8 ncaddr; -} __packed; - -void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb); -void irlap_send_discovery_xid_frame(struct irlap_cb *, int S, __u8 s, - __u8 command, - struct discovery_t *discovery); -void irlap_send_snrm_frame(struct irlap_cb *, struct qos_info *); -void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, - struct sk_buff *cmd); -void irlap_send_ua_response_frame(struct irlap_cb *, struct qos_info *); -void irlap_send_dm_frame(struct irlap_cb *self); -void irlap_send_rd_frame(struct irlap_cb *self); -void irlap_send_disc_frame(struct irlap_cb *self); -void irlap_send_rr_frame(struct irlap_cb *self, int command); - -void irlap_send_data_primary(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_primary_poll(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_secondary(struct irlap_cb *, struct sk_buff *); -void irlap_send_data_secondary_final(struct irlap_cb *, struct sk_buff *); -void irlap_resend_rejected_frames(struct irlap_cb *, int command); -void irlap_resend_rejected_frame(struct irlap_cb *self, int command); - -void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb, - __u8 caddr, int command); - -int irlap_insert_qos_negotiation_params(struct irlap_cb *self, - struct sk_buff *skb); - -#endif diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h deleted file mode 100644 index f132924cc9da..000000000000 --- a/include/net/irda/irlmp.h +++ /dev/null @@ -1,295 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp.h - * Version: 0.9 - * Description: IrDA Link Management Protocol (LMP) layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 17 20:54:32 1997 - * Modified at: Fri Dec 10 13:23:01 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLMP_H -#define IRLMP_H - -#include /* for HZ */ - -#include - -#include -#include -#include /* LAP_MAX_HEADER, ... */ -#include -#include -#include - -/* LSAP-SEL's */ -#define LSAP_MASK 0x7f -#define LSAP_IAS 0x00 -#define LSAP_ANY 0xff -#define LSAP_MAX 0x6f /* 0x70-0x7f are reserved */ -#define LSAP_CONNLESS 0x70 /* Connectionless LSAP, mostly used for Ultra */ - -#define DEV_ADDR_ANY 0xffffffff - -#define LMP_HEADER 2 /* Dest LSAP + Source LSAP */ -#define LMP_CONTROL_HEADER 4 /* LMP_HEADER + opcode + parameter */ -#define LMP_PID_HEADER 1 /* Used by Ultra */ -#define LMP_MAX_HEADER (LMP_CONTROL_HEADER+LAP_MAX_HEADER) - -#define LM_MAX_CONNECTIONS 10 - -#define LM_IDLE_TIMEOUT 2*HZ /* 2 seconds for now */ - -typedef enum { - S_PNP = 0, - S_PDA, - S_COMPUTER, - S_PRINTER, - S_MODEM, - S_FAX, - S_LAN, - S_TELEPHONY, - S_COMM, - S_OBEX, - S_ANY, - S_END, -} SERVICE; - -/* For selective discovery */ -typedef void (*DISCOVERY_CALLBACK1) (discinfo_t *, DISCOVERY_MODE, void *); -/* For expiry (the same) */ -typedef void (*DISCOVERY_CALLBACK2) (discinfo_t *, DISCOVERY_MODE, void *); - -typedef struct { - irda_queue_t queue; /* Must be first */ - - __u16_host_order hints; /* Hint bits */ -} irlmp_service_t; - -typedef struct { - irda_queue_t queue; /* Must be first */ - - __u16_host_order hint_mask; - - DISCOVERY_CALLBACK1 disco_callback; /* Selective discovery */ - DISCOVERY_CALLBACK2 expir_callback; /* Selective expiration */ - void *priv; /* Used to identify client */ -} irlmp_client_t; - -/* - * Information about each logical LSAP connection - */ -struct lsap_cb { - irda_queue_t queue; /* Must be first */ - magic_t magic; - - unsigned long connected; /* set_bit used on this */ - int persistent; - - __u8 slsap_sel; /* Source (this) LSAP address */ - __u8 dlsap_sel; /* Destination LSAP address (if connected) */ -#ifdef CONFIG_IRDA_ULTRA - __u8 pid; /* Used by connectionless LSAP */ -#endif /* CONFIG_IRDA_ULTRA */ - struct sk_buff *conn_skb; /* Store skb here while connecting */ - - struct timer_list watchdog_timer; - - LSAP_STATE lsap_state; /* Connection state */ - notify_t notify; /* Indication/Confirm entry points */ - struct qos_info qos; /* QoS for this connection */ - - struct lap_cb *lap; /* Pointer to LAP connection structure */ -}; - -/* - * Used for caching the last slsap->dlsap->handle mapping - * - * We don't need to keep/match the remote address in the cache because - * we are associated with a specific LAP (which implies it). - * Jean II - */ -typedef struct { - int valid; - - __u8 slsap_sel; - __u8 dlsap_sel; - struct lsap_cb *lsap; -} CACHE_ENTRY; - -/* - * Information about each registered IrLAP layer - */ -struct lap_cb { - irda_queue_t queue; /* Must be first */ - magic_t magic; - - int reason; /* LAP disconnect reason */ - - IRLMP_STATE lap_state; - - struct irlap_cb *irlap; /* Instance of IrLAP layer */ - hashbin_t *lsaps; /* LSAP associated with this link */ - struct lsap_cb *flow_next; /* Next lsap to be polled for Tx */ - - __u8 caddr; /* Connection address */ - __u32 saddr; /* Source device address */ - __u32 daddr; /* Destination device address */ - - struct qos_info *qos; /* LAP QoS for this session */ - struct timer_list idle_timer; - -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - /* The lsap cache was moved from struct irlmp_cb to here because - * it must be associated with the specific LAP. Also, this - * improves performance. - Jean II */ - CACHE_ENTRY cache; /* Caching last slsap->dlsap->handle mapping */ -#endif -}; - -/* - * Main structure for IrLMP - */ -struct irlmp_cb { - magic_t magic; - - __u8 conflict_flag; - - discovery_t discovery_cmd; /* Discovery command to use by IrLAP */ - discovery_t discovery_rsp; /* Discovery response to use by IrLAP */ - - /* Last lsap picked automatically by irlmp_find_free_slsap() */ - int last_lsap_sel; - - struct timer_list discovery_timer; - - hashbin_t *links; /* IrLAP connection table */ - hashbin_t *unconnected_lsaps; - hashbin_t *clients; - hashbin_t *services; - - hashbin_t *cachelog; /* Current discovery log */ - - int running; - - __u16_host_order hints; /* Hint bits */ -}; - -/* Prototype declarations */ -int irlmp_init(void); -void irlmp_cleanup(void); -struct lsap_cb *irlmp_open_lsap(__u8 slsap, notify_t *notify, __u8 pid); -void irlmp_close_lsap( struct lsap_cb *self); - -__u16 irlmp_service_to_hint(int service); -void *irlmp_register_service(__u16 hints); -int irlmp_unregister_service(void *handle); -void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv); -int irlmp_unregister_client(void *handle); -int irlmp_update_client(void *handle, __u16 hint_mask, - DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv); - -void irlmp_register_link(struct irlap_cb *, __u32 saddr, notify_t *); -void irlmp_unregister_link(__u32 saddr); - -int irlmp_connect_request(struct lsap_cb *, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *, struct sk_buff *); -void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb); -int irlmp_connect_response(struct lsap_cb *, struct sk_buff *); -void irlmp_connect_confirm(struct lsap_cb *, struct sk_buff *); -struct lsap_cb *irlmp_dup(struct lsap_cb *self, void *instance); - -void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, - struct sk_buff *userdata); -int irlmp_disconnect_request(struct lsap_cb *, struct sk_buff *userdata); - -void irlmp_discovery_confirm(hashbin_t *discovery_log, DISCOVERY_MODE mode); -void irlmp_discovery_request(int nslots); -discinfo_t *irlmp_get_discoveries(int *pn, __u16 mask, int nslots); -void irlmp_do_expiry(void); -void irlmp_do_discovery(int nslots); -discovery_t *irlmp_get_discovery_response(void); -void irlmp_discovery_expiry(discinfo_t *expiry, int number); - -int irlmp_data_request(struct lsap_cb *, struct sk_buff *); -void irlmp_data_indication(struct lsap_cb *, struct sk_buff *); - -int irlmp_udata_request(struct lsap_cb *, struct sk_buff *); -void irlmp_udata_indication(struct lsap_cb *, struct sk_buff *); - -#ifdef CONFIG_IRDA_ULTRA -int irlmp_connless_data_request(struct lsap_cb *, struct sk_buff *, __u8); -void irlmp_connless_data_indication(struct lsap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlmp_status_indication(struct lap_cb *, LINK_STATUS link, LOCK_STATUS lock); -void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow); - -LM_REASON irlmp_convert_lap_reason(LAP_REASON); - -static inline __u32 irlmp_get_saddr(const struct lsap_cb *self) -{ - return (self && self->lap) ? self->lap->saddr : 0; -} - -static inline __u32 irlmp_get_daddr(const struct lsap_cb *self) -{ - return (self && self->lap) ? self->lap->daddr : 0; -} - -const char *irlmp_reason_str(LM_REASON reason); - -extern int sysctl_discovery_timeout; -extern int sysctl_discovery_slots; -extern int sysctl_discovery; -extern int sysctl_lap_keepalive_time; /* in ms, default is LM_IDLE_TIMEOUT */ -extern struct irlmp_cb *irlmp; - -/* Check if LAP queue is full. - * Used by IrTTP for low control, see comments in irlap.h - Jean II */ -static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) -{ - if (self == NULL) - return 0; - if (self->lap == NULL) - return 0; - if (self->lap->irlap == NULL) - return 0; - - return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; -} - -/* After doing a irlmp_dup(), this get one of the two socket back into - * a state where it's waiting incoming connections. - * Note : this can be used *only* if the socket is not yet connected - * (i.e. NO irlmp_connect_response() done on this socket). - * - Jean II */ -static inline void irlmp_listen(struct lsap_cb *self) -{ - self->dlsap_sel = LSAP_ANY; - self->lap = NULL; - self->lsap_state = LSAP_DISCONNECTED; - /* Started when we received the LM_CONNECT_INDICATION */ - del_timer(&self->watchdog_timer); -} - -#endif diff --git a/include/net/irda/irlmp_event.h b/include/net/irda/irlmp_event.h deleted file mode 100644 index 9e4ec17a7449..000000000000 --- a/include/net/irda/irlmp_event.h +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_event.h - * Version: 0.1 - * Description: IrDA-LMP event handling - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Thu Jul 8 12:18:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRLMP_EVENT_H -#define IRLMP_EVENT_H - -/* A few forward declarations (to make compiler happy) */ -struct irlmp_cb; -struct lsap_cb; -struct lap_cb; -struct discovery_t; - -/* LAP states */ -typedef enum { - /* IrLAP connection control states */ - LAP_STANDBY, /* No LAP connection */ - LAP_U_CONNECT, /* Starting LAP connection */ - LAP_ACTIVE, /* LAP connection is active */ -} IRLMP_STATE; - -/* LSAP connection control states */ -typedef enum { - LSAP_DISCONNECTED, /* No LSAP connection */ - LSAP_CONNECT, /* Connect indication from peer */ - LSAP_CONNECT_PEND, /* Connect request from service user */ - LSAP_DATA_TRANSFER_READY, /* LSAP connection established */ - LSAP_SETUP, /* Trying to set up LSAP connection */ - LSAP_SETUP_PEND, /* Request to start LAP connection */ -} LSAP_STATE; - -typedef enum { - /* LSAP events */ - LM_CONNECT_REQUEST, - LM_CONNECT_CONFIRM, - LM_CONNECT_RESPONSE, - LM_CONNECT_INDICATION, - - LM_DISCONNECT_INDICATION, - LM_DISCONNECT_REQUEST, - - LM_DATA_REQUEST, - LM_UDATA_REQUEST, - LM_DATA_INDICATION, - LM_UDATA_INDICATION, - - LM_WATCHDOG_TIMEOUT, - - /* IrLAP events */ - LM_LAP_CONNECT_REQUEST, - LM_LAP_CONNECT_INDICATION, - LM_LAP_CONNECT_CONFIRM, - LM_LAP_DISCONNECT_INDICATION, - LM_LAP_DISCONNECT_REQUEST, - LM_LAP_DISCOVERY_REQUEST, - LM_LAP_DISCOVERY_CONFIRM, - LM_LAP_IDLE_TIMEOUT, -} IRLMP_EVENT; - -extern const char *const irlmp_state[]; -extern const char *const irlsap_state[]; - -void irlmp_watchdog_timer_expired(void *data); -void irlmp_discovery_timer_expired(void *data); -void irlmp_idle_timer_expired(void *data); - -void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb); -int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb); - -#endif /* IRLMP_EVENT_H */ - - - - diff --git a/include/net/irda/irlmp_frame.h b/include/net/irda/irlmp_frame.h deleted file mode 100644 index 1906eb71422e..000000000000 --- a/include/net/irda/irlmp_frame.h +++ /dev/null @@ -1,62 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_frame.h - * Version: 0.9 - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Aug 19 02:09:59 1997 - * Modified at: Fri Dec 10 13:21:53 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1999 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRMLP_FRAME_H -#define IRMLP_FRAME_H - -#include - -#include - -/* IrLMP frame opcodes */ -#define CONNECT_CMD 0x01 -#define CONNECT_CNF 0x81 -#define DISCONNECT 0x02 -#define ACCESSMODE_CMD 0x03 -#define ACCESSMODE_CNF 0x83 - -#define CONTROL_BIT 0x80 - -void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - int expedited, struct sk_buff *skb); -void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - __u8 opcode, struct sk_buff *skb); -void irlmp_link_data_indication(struct lap_cb *, struct sk_buff *, - int unreliable); -#ifdef CONFIG_IRDA_ULTRA -void irlmp_link_unitdata_indication(struct lap_cb *, struct sk_buff *); -#endif /* CONFIG_IRDA_ULTRA */ - -void irlmp_link_connect_indication(struct lap_cb *, __u32 saddr, __u32 daddr, - struct qos_info *qos, struct sk_buff *skb); -void irlmp_link_connect_request(__u32 daddr); -void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, - struct sk_buff *skb); -void irlmp_link_disconnect_indication(struct lap_cb *, struct irlap_cb *, - LAP_REASON reason, struct sk_buff *); -void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log); -void irlmp_link_discovery_indication(struct lap_cb *, discovery_t *discovery); - -#endif diff --git a/include/net/irda/irmod.h b/include/net/irda/irmod.h deleted file mode 100644 index 86f0dbb8ee5d..000000000000 --- a/include/net/irda/irmod.h +++ /dev/null @@ -1,109 +0,0 @@ -/********************************************************************* - * - * Filename: irmod.h - * Version: 0.3 - * Description: IrDA module and utilities functions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Dec 15 13:58:52 1997 - * Modified at: Fri Jan 28 13:15:24 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charg. - * - ********************************************************************/ - -#ifndef IRMOD_H -#define IRMOD_H - -/* Misc status information */ -typedef enum { - STATUS_OK, - STATUS_ABORTED, - STATUS_NO_ACTIVITY, - STATUS_NOISY, - STATUS_REMOTE, -} LINK_STATUS; - -typedef enum { - LOCK_NO_CHANGE, - LOCK_LOCKED, - LOCK_UNLOCKED, -} LOCK_STATUS; - -typedef enum { FLOW_STOP, FLOW_START } LOCAL_FLOW; - -/* - * IrLMP disconnect reasons. The order is very important, since they - * correspond to disconnect reasons sent in IrLMP disconnect frames, so - * please do not touch :-) - */ -typedef enum { - LM_USER_REQUEST = 1, /* User request */ - LM_LAP_DISCONNECT, /* Unexpected IrLAP disconnect */ - LM_CONNECT_FAILURE, /* Failed to establish IrLAP connection */ - LM_LAP_RESET, /* IrLAP reset */ - LM_INIT_DISCONNECT, /* Link Management initiated disconnect */ - LM_LSAP_NOTCONN, /* Data delivered on unconnected LSAP */ - LM_NON_RESP_CLIENT, /* Non responsive LM-MUX client */ - LM_NO_AVAIL_CLIENT, /* No available LM-MUX client */ - LM_CONN_HALF_OPEN, /* Connection is half open */ - LM_BAD_SOURCE_ADDR, /* Illegal source address (i.e 0x00) */ -} LM_REASON; -#define LM_UNKNOWN 0xff /* Unspecified disconnect reason */ - -/* A few forward declarations (to make compiler happy) */ -struct qos_info; /* in */ - -/* - * Notify structure used between transport and link management layers - */ -typedef struct { - int (*data_indication)(void *priv, void *sap, struct sk_buff *skb); - int (*udata_indication)(void *priv, void *sap, struct sk_buff *skb); - void (*connect_confirm)(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb); - void (*connect_indication)(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb); - void (*disconnect_indication)(void *instance, void *sap, - LM_REASON reason, struct sk_buff *); - void (*flow_indication)(void *instance, void *sap, LOCAL_FLOW flow); - void (*status_indication)(void *instance, - LINK_STATUS link, LOCK_STATUS lock); - void *instance; /* Layer instance pointer */ - char name[16]; /* Name of layer */ -} notify_t; - -#define NOTIFY_MAX_NAME 16 - -/* Zero the notify structure */ -void irda_notify_init(notify_t *notify); - -/* Locking wrapper - Note the inverted logic on irda_lock(). - * Those function basically return false if the lock is already in the - * position you want to set it. - Jean II */ -#define irda_lock(lock) (! test_and_set_bit(0, (void *) (lock))) -#define irda_unlock(lock) (test_and_clear_bit(0, (void *) (lock))) - -#endif /* IRMOD_H */ - - - - - - - - - diff --git a/include/net/irda/irqueue.h b/include/net/irda/irqueue.h deleted file mode 100644 index 37f512bd6733..000000000000 --- a/include/net/irda/irqueue.h +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************* - * - * Filename: irqueue.h - * Version: 0.3 - * Description: General queue implementation - * Status: Experimental. - * Author: Dag Brattli - * Created at: Tue Jun 9 13:26:50 1998 - * Modified at: Thu Oct 7 13:25:16 1999 - * Modified by: Dag Brattli - * - * Copyright (C) 1998-1999, Aage Kvalnes - * Copyright (c) 1998, Dag Brattli - * All Rights Reserved. - * - * This code is taken from the Vortex Operating System written by Aage - * Kvalnes and has been ported to Linux and Linux/IR by Dag Brattli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include -#include - -#ifndef IRDA_QUEUE_H -#define IRDA_QUEUE_H - -#define NAME_SIZE 32 - -/* - * Hash types (some flags can be xored) - * See comments in irqueue.c for which one to use... - */ -#define HB_NOLOCK 0 /* No concurent access prevention */ -#define HB_LOCK 1 /* Prevent concurent write with global lock */ - -/* - * Hash defines - */ -#define HASHBIN_SIZE 8 -#define HASHBIN_MASK 0x7 - -#ifndef IRDA_ALIGN -#define IRDA_ALIGN __attribute__((aligned)) -#endif - -#define Q_NULL { NULL, NULL, "", 0 } - -typedef void (*FREE_FUNC)(void *arg); - -struct irda_queue { - struct irda_queue *q_next; - struct irda_queue *q_prev; - - char q_name[NAME_SIZE]; - long q_hash; /* Must be able to cast a (void *) */ -}; -typedef struct irda_queue irda_queue_t; - -typedef struct hashbin_t { - __u32 magic; - int hb_type; - int hb_size; - spinlock_t hb_spinlock; /* HB_LOCK - Can be used by the user */ - - irda_queue_t* hb_queue[HASHBIN_SIZE] IRDA_ALIGN; - - irda_queue_t* hb_current; -} hashbin_t; - -hashbin_t *hashbin_new(int type); -int hashbin_delete(hashbin_t* hashbin, FREE_FUNC func); -int hashbin_clear(hashbin_t* hashbin, FREE_FUNC free_func); -void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, - const char* name); -void* hashbin_remove(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_remove_first(hashbin_t *hashbin); -void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry); -void* hashbin_find(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_lock_find(hashbin_t* hashbin, long hashv, const char* name); -void* hashbin_find_next(hashbin_t* hashbin, long hashv, const char* name, - void ** pnext); -irda_queue_t *hashbin_get_first(hashbin_t *hashbin); -irda_queue_t *hashbin_get_next(hashbin_t *hashbin); - -#define HASHBIN_GET_SIZE(hashbin) hashbin->hb_size - -#endif diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h deleted file mode 100644 index 98682d4bae8f..000000000000 --- a/include/net/irda/irttp.h +++ /dev/null @@ -1,210 +0,0 @@ -/********************************************************************* - * - * Filename: irttp.h - * Version: 1.0 - * Description: Tiny Transport Protocol (TTP) definitions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 31 20:14:31 1997 - * Modified at: Sun Dec 12 13:09:07 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRTTP_H -#define IRTTP_H - -#include -#include -#include - -#include -#include /* struct lsap_cb */ -#include /* struct qos_info */ -#include - -#define TTP_MAX_CONNECTIONS LM_MAX_CONNECTIONS -#define TTP_HEADER 1 -#define TTP_MAX_HEADER (TTP_HEADER + LMP_MAX_HEADER) -#define TTP_SAR_HEADER 5 -#define TTP_PARAMETERS 0x80 -#define TTP_MORE 0x80 - -/* Transmission queue sizes */ -/* Worst case scenario, two window of data - Jean II */ -#define TTP_TX_MAX_QUEUE 14 -/* We need to keep at least 5 frames to make sure that we can refill - * appropriately the LAP layer. LAP keeps only two buffers, and we need - * to have 7 to make a full window - Jean II */ -#define TTP_TX_LOW_THRESHOLD 5 -/* Most clients are synchronous with respect to flow control, so we can - * keep a low number of Tx buffers in TTP - Jean II */ -#define TTP_TX_HIGH_THRESHOLD 7 - -/* Receive queue sizes */ -/* Minimum of credit that the peer should hold. - * If the peer has less credits than 9 frames, we will explicitly send - * him some credits (through irttp_give_credit() and a specific frame). - * Note that when we give credits it's likely that it won't be sent in - * this LAP window, but in the next one. So, we make sure that the peer - * has something to send while waiting for credits (one LAP window == 7 - * + 1 frames while he process the credits). - Jean II */ -#define TTP_RX_MIN_CREDIT 8 -/* This is the default maximum number of credits held by the peer, so the - * default maximum number of frames he can send us before needing flow - * control answer from us (this may be negociated differently at TSAP setup). - * We want to minimise the number of times we have to explicitly send some - * credit to the peer, hoping we can piggyback it on the return data. In - * particular, it doesn't make sense for us to send credit more than once - * per LAP window. - * Moreover, giving credits has some latency, so we need strictly more than - * a LAP window, otherwise we may already have credits in our Tx queue. - * But on the other hand, we don't want to keep too many Rx buffer here - * before starting to flow control the other end, so make it exactly one - * LAP window + 1 + MIN_CREDITS. - Jean II */ -#define TTP_RX_DEFAULT_CREDIT 16 -/* Maximum number of credits we can allow the peer to have, and therefore - * maximum Rx queue size. - * Note that we try to deliver packets to the higher layer every time we - * receive something, so in normal mode the Rx queue will never contains - * more than one or two packets. - Jean II */ -#define TTP_RX_MAX_CREDIT 21 - -/* What clients should use when calling ttp_open_tsap() */ -#define DEFAULT_INITIAL_CREDIT TTP_RX_DEFAULT_CREDIT - -/* Some priorities for disconnect requests */ -#define P_NORMAL 0 -#define P_HIGH 1 - -#define TTP_SAR_DISABLE 0 -#define TTP_SAR_UNBOUND 0xffffffff - -/* Parameters */ -#define TTP_MAX_SDU_SIZE 0x01 - -/* - * This structure contains all data associated with one instance of a TTP - * connection. - */ -struct tsap_cb { - irda_queue_t q; /* Must be first */ - magic_t magic; /* Just in case */ - - __u8 stsap_sel; /* Source TSAP */ - __u8 dtsap_sel; /* Destination TSAP */ - - struct lsap_cb *lsap; /* Corresponding LSAP to this TSAP */ - - __u8 connected; /* TSAP connected */ - - __u8 initial_credit; /* Initial credit to give peer */ - - int avail_credit; /* Available credit to return to peer */ - int remote_credit; /* Credit held by peer TTP entity */ - int send_credit; /* Credit held by local TTP entity */ - - struct sk_buff_head tx_queue; /* Frames to be transmitted */ - struct sk_buff_head rx_queue; /* Received frames */ - struct sk_buff_head rx_fragments; - int tx_queue_lock; - int rx_queue_lock; - spinlock_t lock; - - notify_t notify; /* Callbacks to client layer */ - - struct net_device_stats stats; - struct timer_list todo_timer; - - __u32 max_seg_size; /* Max data that fit into an IrLAP frame */ - __u8 max_header_size; - - int rx_sdu_busy; /* RxSdu.busy */ - __u32 rx_sdu_size; /* Current size of a partially received frame */ - __u32 rx_max_sdu_size; /* Max receive user data size */ - - int tx_sdu_busy; /* TxSdu.busy */ - __u32 tx_max_sdu_size; /* Max transmit user data size */ - - int close_pend; /* Close, but disconnect_pend */ - unsigned long disconnect_pend; /* Disconnect, but still data to send */ - struct sk_buff *disconnect_skb; -}; - -struct irttp_cb { - magic_t magic; - hashbin_t *tsaps; -}; - -int irttp_init(void); -void irttp_cleanup(void); - -struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify); -int irttp_close_tsap(struct tsap_cb *self); - -int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb); -int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb); - -int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *qos, __u32 max_sdu_size, - struct sk_buff *userdata); -int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, - struct sk_buff *userdata); -int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *skb, - int priority); -void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow); -struct tsap_cb *irttp_dup(struct tsap_cb *self, void *instance); - -static inline __u32 irttp_get_saddr(struct tsap_cb *self) -{ - return irlmp_get_saddr(self->lsap); -} - -static inline __u32 irttp_get_daddr(struct tsap_cb *self) -{ - return irlmp_get_daddr(self->lsap); -} - -static inline __u32 irttp_get_max_seg_size(struct tsap_cb *self) -{ - return self->max_seg_size; -} - -/* After doing a irttp_dup(), this get one of the two socket back into - * a state where it's waiting incoming connections. - * Note : this can be used *only* if the socket is not yet connected - * (i.e. NO irttp_connect_response() done on this socket). - * - Jean II */ -static inline void irttp_listen(struct tsap_cb *self) -{ - irlmp_listen(self->lsap); - self->dtsap_sel = LSAP_ANY; -} - -/* Return TRUE if the node is in primary mode (i.e. master) - * - Jean II */ -static inline int irttp_is_primary(struct tsap_cb *self) -{ - if ((self == NULL) || - (self->lsap == NULL) || - (self->lsap->lap == NULL) || - (self->lsap->lap->irlap == NULL)) - return -2; - return irlap_is_primary(self->lsap->lap->irlap); -} - -#endif /* IRTTP_H */ diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h deleted file mode 100644 index 2d9cd0007cba..000000000000 --- a/include/net/irda/parameters.h +++ /dev/null @@ -1,100 +0,0 @@ -/********************************************************************* - * - * Filename: parameters.h - * Version: 1.0 - * Description: A more general way to handle (pi,pl,pv) parameters - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Jun 7 08:47:28 1999 - * Modified at: Sun Jan 30 14:05:14 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - * Michel Dänzer , 10/2001 - * - simplify irda_pv_t to avoid endianness issues - * - ********************************************************************/ - -#ifndef IRDA_PARAMS_H -#define IRDA_PARAMS_H - -/* - * The currently supported types. Beware not to change the sequence since - * it a good reason why the sized integers has a value equal to their size - */ -typedef enum { - PV_INTEGER, /* Integer of any (pl) length */ - PV_INT_8_BITS, /* Integer of 8 bits in length */ - PV_INT_16_BITS, /* Integer of 16 bits in length */ - PV_STRING, /* \0 terminated string */ - PV_INT_32_BITS, /* Integer of 32 bits in length */ - PV_OCT_SEQ, /* Octet sequence */ - PV_NO_VALUE /* Does not contain any value (pl=0) */ -} PV_TYPE; - -/* Bit 7 of type field */ -#define PV_BIG_ENDIAN 0x80 -#define PV_LITTLE_ENDIAN 0x00 -#define PV_MASK 0x7f /* To mask away endian bit */ - -#define PV_PUT 0 -#define PV_GET 1 - -typedef union { - char *c; - __u32 i; - __u32 *ip; -} irda_pv_t; - -typedef struct { - __u8 pi; - __u8 pl; - irda_pv_t pv; -} irda_param_t; - -typedef int (*PI_HANDLER)(void *self, irda_param_t *param, int get); -typedef int (*PV_HANDLER)(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); - -typedef struct { - const PI_HANDLER func; /* Handler for this parameter identifier */ - PV_TYPE type; /* Data type for this parameter */ -} pi_minor_info_t; - -typedef struct { - const pi_minor_info_t *pi_minor_call_table; - int len; -} pi_major_info_t; - -typedef struct { - const pi_major_info_t *tables; - int len; - __u8 pi_mask; - int pi_major_offset; -} pi_param_info_t; - -int irda_param_pack(__u8 *buf, char *fmt, ...); - -int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, - pi_param_info_t *info); -int irda_param_extract_all(void *self, __u8 *buf, int len, - pi_param_info_t *info); - -#define irda_param_insert_byte(buf,pi,pv) irda_param_pack(buf,"bbb",pi,1,pv) - -#endif /* IRDA_PARAMS_H */ - diff --git a/include/net/irda/qos.h b/include/net/irda/qos.h deleted file mode 100644 index 05a5a249956f..000000000000 --- a/include/net/irda/qos.h +++ /dev/null @@ -1,101 +0,0 @@ -/********************************************************************* - * - * Filename: qos.h - * Version: 1.0 - * Description: Quality of Service definitions - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Sep 19 23:21:09 1997 - * Modified at: Thu Dec 2 13:51:54 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * - ********************************************************************/ - -#ifndef IRDA_QOS_H -#define IRDA_QOS_H - -#include - -#include - -#define PI_BAUD_RATE 0x01 -#define PI_MAX_TURN_TIME 0x82 -#define PI_DATA_SIZE 0x83 -#define PI_WINDOW_SIZE 0x84 -#define PI_ADD_BOFS 0x85 -#define PI_MIN_TURN_TIME 0x86 -#define PI_LINK_DISC 0x08 - -#define IR_115200_MAX 0x3f - -/* Baud rates (first byte) */ -#define IR_2400 0x01 -#define IR_9600 0x02 -#define IR_19200 0x04 -#define IR_38400 0x08 -#define IR_57600 0x10 -#define IR_115200 0x20 -#define IR_576000 0x40 -#define IR_1152000 0x80 - -/* Baud rates (second byte) */ -#define IR_4000000 0x01 -#define IR_16000000 0x02 - -/* Quality of Service information */ -typedef struct { - __u32 value; - __u16 bits; /* LSB is first byte, MSB is second byte */ -} qos_value_t; - -struct qos_info { - magic_t magic; - - qos_value_t baud_rate; /* IR_11520O | ... */ - qos_value_t max_turn_time; - qos_value_t data_size; - qos_value_t window_size; - qos_value_t additional_bofs; - qos_value_t min_turn_time; - qos_value_t link_disc_time; - - qos_value_t power; -}; - -extern int sysctl_max_baud_rate; -extern int sysctl_max_inactive_time; - -void irda_init_max_qos_capabilies(struct qos_info *qos); -void irda_qos_compute_intersection(struct qos_info *, struct qos_info *); - -__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time); - -void irda_qos_bits_to_value(struct qos_info *qos); - -/* So simple, how could we not inline those two ? - * Note : one byte is 10 bits if you include start and stop bits - * Jean II */ -#define irlap_min_turn_time_in_bytes(speed, min_turn_time) ( \ - speed * min_turn_time / 10000000 \ -) -#define irlap_xbofs_in_usec(speed, xbofs) ( \ - xbofs * 10000000 / speed \ -) - -#endif - diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h deleted file mode 100644 index d784f242cf7b..000000000000 --- a/include/net/irda/timer.h +++ /dev/null @@ -1,105 +0,0 @@ -/********************************************************************* - * - * Filename: timer.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Thu Oct 7 12:25:24 1999 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1998-1999 Dag Brattli , - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef TIMER_H -#define TIMER_H - -#include -#include - -#include /* for HZ */ - -#include - -/* A few forward declarations (to make compiler happy) */ -struct irlmp_cb; -struct irlap_cb; -struct lsap_cb; -struct lap_cb; - -/* - * Timeout definitions, some defined in IrLAP 6.13.5 - p. 92 - */ -#define POLL_TIMEOUT (450*HZ/1000) /* Must never exceed 500 ms */ -#define FINAL_TIMEOUT (500*HZ/1000) /* Must never exceed 500 ms */ - -/* - * Normally twice of p-timer. Note 3, IrLAP 6.3.11.2 - p. 60 suggests - * at least twice duration of the P-timer. - */ -#define WD_TIMEOUT (POLL_TIMEOUT*2) - -#define MEDIABUSY_TIMEOUT (500*HZ/1000) /* 500 msec */ -#define SMALLBUSY_TIMEOUT (100*HZ/1000) /* 100 msec - IrLAP 6.13.4 */ - -/* - * Slot timer must never exceed 85 ms, and must always be at least 25 ms, - * suggested to 75-85 msec by IrDA lite. This doesn't work with a lot of - * devices, and other stackes uses a lot more, so it's best we do it as well - * (Note : this is the default value and sysctl overrides it - Jean II) - */ -#define SLOT_TIMEOUT (90*HZ/1000) - -/* - * The latest discovery frame (XID) is longer due to the extra discovery - * information (hints, device name...). This is its extra length. - * We use that when setting the query timeout. Jean II - */ -#define XIDEXTRA_TIMEOUT (34*HZ/1000) /* 34 msec */ - -#define WATCHDOG_TIMEOUT (20*HZ) /* 20 sec */ - -typedef void (*TIMER_CALLBACK)(void *); - -static inline void irda_start_timer(struct timer_list *ptimer, int timeout, - void* data, TIMER_CALLBACK callback) -{ - ptimer->function = (void (*)(unsigned long)) callback; - ptimer->data = (unsigned long) data; - - /* Set new value for timer (update or add timer). - * We use mod_timer() because it's more efficient and also - * safer with respect to race conditions - Jean II */ - mod_timer(ptimer, jiffies + timeout); -} - - -void irlap_start_slot_timer(struct irlap_cb *self, int timeout); -void irlap_start_query_timer(struct irlap_cb *self, int S, int s); -void irlap_start_final_timer(struct irlap_cb *self, int timeout); -void irlap_start_wd_timer(struct irlap_cb *self, int timeout); -void irlap_start_backoff_timer(struct irlap_cb *self, int timeout); - -void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout); -void irlap_stop_mbusy_timer(struct irlap_cb *); - -void irlmp_start_watchdog_timer(struct lsap_cb *, int timeout); -void irlmp_start_discovery_timer(struct irlmp_cb *, int timeout); -void irlmp_start_idle_timer(struct lap_cb *, int timeout); -void irlmp_stop_idle_timer(struct lap_cb *self); - -#endif - diff --git a/include/net/irda/wrapper.h b/include/net/irda/wrapper.h deleted file mode 100644 index eef53ebe3d76..000000000000 --- a/include/net/irda/wrapper.h +++ /dev/null @@ -1,58 +0,0 @@ -/********************************************************************* - * - * Filename: wrapper.h - * Version: 1.2 - * Description: IrDA SIR async wrapper layer - * Status: Experimental. - * Author: Dag Brattli - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Tue Jan 11 12:37:29 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli , - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef WRAPPER_H -#define WRAPPER_H - -#include -#include -#include - -#include /* iobuff_t */ - -#define BOF 0xc0 /* Beginning of frame */ -#define XBOF 0xff -#define EOF 0xc1 /* End of frame */ -#define CE 0x7d /* Control escape */ - -#define STA BOF /* Start flag */ -#define STO EOF /* End flag */ - -#define IRDA_TRANS 0x20 /* Asynchronous transparency modifier */ - -/* States for receiving a frame in async mode */ -enum { - OUTSIDE_FRAME, - BEGIN_FRAME, - LINK_ESCAPE, - INSIDE_FRAME -}; - -/* Proto definitions */ -int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize); -void async_unwrap_char(struct net_device *dev, struct net_device_stats *stats, - iobuff_t *buf, __u8 byte); - -#endif -- cgit From 21acf63013ed3d6fce3176cc34b74064052a31b4 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Mon, 28 Aug 2017 16:18:42 +1000 Subject: net/ncsi: Configure VLAN tag filter Make use of the ndo_vlan_rx_{add,kill}_vid callbacks to have the NCSI stack process new VLAN tags and configure the channel VLAN filter appropriately. Several VLAN tags can be set and a "Set VLAN Filter" packet must be sent for each one, meaning the ncsi_dev_state_config_svf state must be repeated. An internal list of VLAN tags is maintained, and compared against the current channel's ncsi_channel_filter in order to keep track within the state. VLAN filters are removed in a similar manner, with the introduction of the ncsi_dev_state_config_clear_vids state. The maximum number of VLAN tag filters is determined by the "Get Capabilities" response from the channel. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- include/net/ncsi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 68680baac0fd..1f96af46df49 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -28,6 +28,8 @@ struct ncsi_dev { }; #ifdef CONFIG_NET_NCSI +int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid); +int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid); struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); -- cgit From aa4035d2c7683d2f2fb0ffe8087abd9eabf6d54a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:27 -0700 Subject: drm/syncobj: Add a reset ioctl (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just resets the dma_fence to NULL so it looks like it's never been signaled. This will be useful once we add the new wait API for allowing wait on "submit and signal" behavior. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Reviewed-by: Christian König (v1) Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 4c746597225e..b037fdf9e43b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -731,6 +731,12 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -854,6 +860,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit From ffa9443fb3d3eddf0fdf6ac473dc8b5c87f08f15 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Aug 2017 14:10:28 -0700 Subject: drm/syncobj: Add a signal ioctl (v3) This IOCTL provides a mechanism for userspace to trigger a sync object directly. There are other ways that userspace can trigger a syncobj such as submitting a dummy batch somewhere or hanging on to a triggered sync_file and doing an import. This just provides an easy way to manually trigger the sync object without weird hacks. The motivation for this IOCTL is Vulkan fences. Vulkan lets you create a fence already in the signaled state so that you can wait on it immediatly without stalling. We could also handle this with a new create flag to ask the driver to create a syncobj that is already signaled but the IOCTL seemed a bit cleaner and more generic. v2: - Take an array of sync objects (Dave Airlie) v3: - Throw -EINVAL if pad != 0 Signed-off-by: Jason Ekstrand Signed-off-by: Dave Airlie --- include/uapi/drm/drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index b037fdf9e43b..97677cd6964d 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -861,6 +861,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers -- cgit From 2fd523c57e520899e05ec663d04743005bcef5b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 17:06:13 +0200 Subject: dma-mapping: remove dma_alloc_noncoherent and dma_free_noncoherent No users left, everyone switched to the _attrs versions. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 66d8ea68f40b..4c98cc96971f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -549,20 +549,6 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return dma_free_attrs(dev, size, cpu_addr, dma_handle, 0); } -static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - return dma_alloc_attrs(dev, size, dma_handle, gfp, - DMA_ATTR_NON_CONSISTENT); -} - -static inline void dma_free_noncoherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - dma_free_attrs(dev, size, cpu_addr, dma_handle, - DMA_ATTR_NON_CONSISTENT); -} - static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { const struct dma_map_ops *ops = get_dma_ops(dev); -- cgit From 10674a03c633379fadb5b314abde975fba270058 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 29 Aug 2017 10:15:40 +0100 Subject: net: rxrpc: Replace time_t type with time64_t type Since the 'expiry' variable of 'struct key_preparsed_payload' has been changed to 'time64_t' type, which is year 2038 safe on 32bits system. In net/rxrpc subsystem, we need convert 'u32' type to 'time64_t' type when copying ticket expires time to 'prep->expiry', then this patch introduces two helper functions to help convert 'u32' to 'time64_t' type. This patch also uses ktime_get_real_seconds() to get current time instead of get_seconds() which is not year 2038 safe on 32bits system. Signed-off-by: Baolin Wang Signed-off-by: David Howells --- include/keys/rxrpc-type.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index 5de0673f333b..8cf829dbf20e 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -127,4 +127,27 @@ struct rxrpc_key_data_v1 { #define AFSTOKEN_K5_ADDRESSES_MAX 16 /* max K5 addresses */ #define AFSTOKEN_K5_AUTHDATA_MAX 16 /* max K5 pieces of auth data */ +/* + * Truncate a time64_t to the range from 1970 to 2106 as in the network + * protocol. + */ +static inline u32 rxrpc_time64_to_u32(time64_t time) +{ + if (time < 0) + return 0; + + if (time > UINT_MAX) + return UINT_MAX; + + return (u32)time; +} + +/* + * Extend u32 back to time64_t using the same 1970-2106 range. + */ +static inline time64_t rxrpc_u32_to_time64(u32 time) +{ + return (time64_t)time; +} + #endif /* _KEYS_RXRPC_TYPE_H */ -- cgit From e833251ad813168253fef9915aaf6a8c883337b0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2017 10:18:56 +0100 Subject: rxrpc: Add notification of end-of-Tx phase Add a callback to rxrpc_kernel_send_data() so that a kernel service can get a notification that the AF_RXRPC call has transitioned out the Tx phase and is now waiting for a reply or a final ACK. This is called from AF_RXRPC with the call state lock held so the notification is guaranteed to come before any reply is passed back. Further, modify the AFS filesystem to make use of this so that we don't have to change the afs_call state before sending the last bit of data. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index c172709787af..07a47ee6f783 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,6 +21,8 @@ struct rxrpc_call; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, + unsigned long); typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); @@ -37,7 +39,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, gfp_t, rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, - struct msghdr *, size_t); + struct msghdr *, size_t, + rxrpc_notify_end_tx_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, -- cgit From c038a58ccfd6704d4d7d60ed3d6a0fca13cf13a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2017 10:19:01 +0100 Subject: rxrpc: Allow failed client calls to be retried Allow a client call that failed on network error to be retried, provided that the Tx queue still holds DATA packet 1. This allows an operation to be submitted to another server or another address for the same server without having to repackage and re-encrypt the data so far processed. Two new functions are provided: (1) rxrpc_kernel_check_call() - This is used to find out the completion state of a call to guess whether it can be retried and whether it should be retried. (2) rxrpc_kernel_retry_call() - Disconnect the call from its current connection, reset the state and submit it as a new client call to a new address. The new address need not match the previous address. A call may be retried even if all the data hasn't been loaded into it yet; a partially constructed will be retained at the same point it was at when an error condition was detected. msg_data_left() can be used to find out how much data was packaged before the error occurred. Signed-off-by: David Howells --- include/net/af_rxrpc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 07a47ee6f783..3ac79150291f 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -19,6 +19,18 @@ struct sock; struct socket; struct rxrpc_call; +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, @@ -51,5 +63,9 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); +int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *, struct key *); +int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, + enum rxrpc_call_completion *, u32 *); #endif /* _NET_RXRPC_H */ -- cgit From c3142dd8bedb8b78ee13c885dd92093fc8a50277 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 15 Aug 2017 22:04:56 +0200 Subject: power: supply: Add power_supply_set_input_current_limit_from_supplier helper On some devices the USB Type-C port power (USB PD 2.0) negotiation is done by a separate port-controller IC, while the current limit is controlled through another (charger) IC. It has been decided to model this by modelling the external Type-C power brick (adapter/charger) as a power-supply class device which supplies the charger-IC, with its voltage-now and current-max representing the negotiated voltage and max current draw. This commit adds a power_supply_set_input_current_limit_from_supplier helper function which charger power-supply drivers can call to get the max-current from their supplier and have this applied through their set_property call-back to their input-current-limit. Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index de89066b72b1..79e90b3d3288 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -332,6 +332,8 @@ extern int power_supply_get_battery_info(struct power_supply *psy, struct power_supply_battery_info *info); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); +extern int power_supply_set_input_current_limit_from_supplier( + struct power_supply *psy); extern int power_supply_set_battery_charged(struct power_supply *psy); #ifdef CONFIG_POWER_SUPPLY -- cgit From 3a731c6414c94012328f485b4b1bb88ed841f9eb Mon Sep 17 00:00:00 2001 From: Liam Breck Date: Wed, 23 Aug 2017 20:36:14 -0700 Subject: power: supply: bq27xxx: Add chip IDs for previously shadowed chips For the existing features, these chips act like others already ID'd, so they had false but functional IDs. We will be adding features which require correct IDs, so the following IDs are added: BQ2752X, 531, 542, 546, 742, 425, 441, 621 Chip-specific features are now tracked by BQ27XXX_O_* flags in di->opts. No functional changes to the driver. Signed-off-by: Liam Breck Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 89890437f9ab..e48e7a4e9cd6 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -6,6 +6,7 @@ enum bq27xxx_chip { BQ27010, /* bq27010, bq27210 */ BQ2750X, /* bq27500 deprecated alias */ BQ2751X, /* bq27510, bq27520 deprecated alias */ + BQ2752X, BQ27500, /* bq27500/1 */ BQ27510G1, /* bq27510G1 */ BQ27510G2, /* bq27510G2 */ @@ -15,9 +16,16 @@ enum bq27xxx_chip { BQ27520G3, /* bq27520G3 */ BQ27520G4, /* bq27520G4 */ BQ27530, /* bq27530, bq27531 */ + BQ27531, BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ + BQ27542, + BQ27546, + BQ27742, BQ27545, /* bq27545 */ BQ27421, /* bq27421, bq27425, bq27441, bq27621 */ + BQ27425, + BQ27441, + BQ27621, }; struct bq27xxx_device_info; @@ -47,6 +55,7 @@ struct bq27xxx_device_info { int id; enum bq27xxx_chip chip; bool ram_chip; + u32 opts; const char *name; struct bq27xxx_dm_reg *dm_regs; u32 unseal_key; -- cgit From 05045379b2740686020618c8cfd4b517cff9f918 Mon Sep 17 00:00:00 2001 From: Liam Breck Date: Wed, 23 Aug 2017 20:36:15 -0700 Subject: power: supply: bq27xxx: Enable data memory update for certain chips Support data memory update on BQ27425. Parameters from TI datasheets are also provided for BQ27500, 545, 421, 441, 621; however these are commented out, as they are not tested. Add BQ27XXX_O_CFGUP & _O_RAM for use in bq27xxx_chip_data[n].opts and by data memory update functions. Signed-off-by: Liam Breck Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index e48e7a4e9cd6..43194e02c1ee 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -54,7 +54,6 @@ struct bq27xxx_device_info { struct device *dev; int id; enum bq27xxx_chip chip; - bool ram_chip; u32 opts; const char *name; struct bq27xxx_dm_reg *dm_regs; -- cgit From 75e8387685f6c65feb195a4556110b58f852b848 Mon Sep 17 00:00:00 2001 From: Zhou Chengming Date: Fri, 25 Aug 2017 21:49:37 +0800 Subject: perf/ftrace: Fix double traces of perf on ftrace:function When running perf on the ftrace:function tracepoint, there is a bug which can be reproduced by: perf record -e ftrace:function -a sleep 20 & perf record -e ftrace:function ls perf script ls 10304 [005] 171.853235: ftrace:function: perf_output_begin ls 10304 [005] 171.853237: ftrace:function: perf_output_begin ls 10304 [005] 171.853239: ftrace:function: task_tgid_nr_ns ls 10304 [005] 171.853240: ftrace:function: task_tgid_nr_ns ls 10304 [005] 171.853242: ftrace:function: __task_pid_nr_ns ls 10304 [005] 171.853244: ftrace:function: __task_pid_nr_ns We can see that all the function traces are doubled. The problem is caused by the inconsistency of the register function perf_ftrace_event_register() with the probe function perf_ftrace_function_call(). The former registers one probe for every perf_event. And the latter handles all perf_events on the current cpu. So when two perf_events on the current cpu, the traces of them will be doubled. So this patch adds an extra parameter "event" for perf_tp_event, only send sample data to this event when it's not NULL. Signed-off-by: Zhou Chengming Reviewed-by: Jiri Olsa Acked-by: Steven Rostedt (VMware) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: huawei.libin@huawei.com Link: http://lkml.kernel.org/r/1503668977-12526-1-git-send-email-zhouchengming1@huawei.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b14095bcf4bb..c00cd4b02f32 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1201,7 +1201,7 @@ extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task); + struct task_struct *task, struct perf_event *event); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 536c80ff7ad9..5012b524283d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -508,9 +508,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, - struct task_struct *task) + struct task_struct *task, struct perf_event *event) { - perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } #endif -- cgit From 6e44636aeab19259f804c8abca57a95ddc01df66 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Tue, 15 Aug 2017 11:59:02 +0300 Subject: net/mlx5: Update HW layout definitions * add offload_type field to mlx5_ifc_qpc_bits * update mlx5_ifc_xrqc_bits layout Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4eff0b8a1482..e27283ab3667 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2022,6 +2022,10 @@ enum { MLX5_QPC_PM_STATE_MIGRATED = 0x3, }; +enum { + MLX5_QPC_OFFLOAD_TYPE_RNDV = 0x1, +}; + enum { MLX5_QPC_END_PADDING_MODE_SCATTER_AS_IS = 0x0, MLX5_QPC_END_PADDING_MODE_PAD_TO_CACHE_LINE_ALIGNMENT = 0x1, @@ -2065,7 +2069,8 @@ struct mlx5_ifc_qpc_bits { u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; - u8 reserved_at_15[0x7]; + u8 reserved_at_15[0x3]; + u8 offload_type[0x4]; u8 end_padding_mode[0x2]; u8 reserved_at_1e[0x2]; @@ -3010,7 +3015,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x880]; + u8 reserved_at_180[0x280]; struct mlx5_ifc_wq_bits wq; }; -- cgit From 6938fc1ee07e54c057430005f8dcaccabce027c3 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:03 +0300 Subject: IB/core: Add XRQ capabilities This patch adds following TM XRQ capabilities: * max_rndv_hdr_size - Max size of rendezvous request message * max_num_tags - Max number of entries in tag matching list * max_ops - Max number of outstanding list operations * max_sge - Max number of SGE in tag matching entry * flags - the following flags are currently defined: - IB_TM_CAP_RC - Support tag matching on RC transport Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 355c7a328e0b..cab0bdcfad51 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -280,6 +280,24 @@ struct ib_rss_caps { u32 max_rwq_indirection_table_size; }; +enum ib_tm_cap_flags { + /* Support tag matching on RC transport */ + IB_TM_CAP_RC = 1 << 0, +}; + +struct ib_xrq_caps { + /* Max size of RNDV header */ + u32 max_rndv_hdr_size; + /* Max number of entries in tag matching list */ + u32 max_num_tags; + /* From enum ib_tm_cap_flags */ + u32 flags; + /* Max number of outstanding list operations */ + u32 max_ops; + /* Max number of SGE in tag matching entry */ + u32 max_sge; +}; + enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, @@ -340,6 +358,7 @@ struct ib_device_attr { struct ib_rss_caps rss_caps; u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ + struct ib_xrq_caps xrq_caps; }; enum ib_mtu { -- cgit From 1a56ff6daab1e062aadec582eb10e7090f0b370a Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:04 +0300 Subject: IB/core: Separate CQ handle in SRQ context Before this change CQ attached to SRQ was part of XRC specific extension. Moving CQ handle out makes it available to other types extending SRQ functionality. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cab0bdcfad51..f0e46757185b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -994,6 +994,11 @@ enum ib_srq_type { IB_SRQT_XRC }; +static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) +{ + return srq_type == IB_SRQT_XRC; +} + enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, @@ -1011,11 +1016,13 @@ struct ib_srq_init_attr { struct ib_srq_attr attr; enum ib_srq_type srq_type; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + } xrc; + }; } ext; }; @@ -1554,12 +1561,14 @@ struct ib_srq { enum ib_srq_type srq_type; atomic_t usecnt; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - u32 srq_num; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + u32 srq_num; + } xrc; + }; } ext; }; -- cgit From 9c2c849625cf779e0fac41c8be3c163df4b80c14 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:05 +0300 Subject: IB/core: Add new SRQ type IB_SRQT_TM This patch adds new SRQ type - IB_SRQT_TM. The new SRQ type supports tag matching and rendezvous offloads for MPI applications. When SRQ receives a message it will search through the matching list for the corresponding posted receive buffer. The process of searching the matching list is called tag matching. In case the tag matching results in a match, the received message will be placed in the address specified by the receive buffer. In case no match was found the message will be placed in a generic buffer until the corresponding receive buffer will be posted. These messages are called unexpected and their set is called an unexpected list. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f0e46757185b..1b4bb8743969 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -991,12 +991,14 @@ enum ib_cq_notify_flags { enum ib_srq_type { IB_SRQT_BASIC, - IB_SRQT_XRC + IB_SRQT_XRC, + IB_SRQT_TM, }; static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) { - return srq_type == IB_SRQT_XRC; + return srq_type == IB_SRQT_XRC || + srq_type == IB_SRQT_TM; } enum ib_srq_attr_mask { @@ -1022,6 +1024,10 @@ struct ib_srq_init_attr { struct { struct ib_xrcd *xrcd; } xrc; + + struct { + u32 max_num_tags; + } tag_matching; }; } ext; }; -- cgit From 9382d4e1d3c09fe20fa53eb12b51ef01ad40774f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:06 +0300 Subject: IB/uverbs: Add XRQ creation parameter to UAPI Add tm_list_size parameter to struct ib_uverbs_create_xsrq. If SRQ type is tag-matching this field defines maximum size of tag matching list. Otherwise, it is expected to be zero. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 63656d2e8705..d5434bbf40c8 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1024,7 +1024,7 @@ struct ib_uverbs_create_xsrq { __u32 max_wr; __u32 max_sge; __u32 srq_limit; - __u32 reserved; + __u32 max_num_tags; __u32 xrcd_handle; __u32 cq_handle; __u64 driver_data[0]; -- cgit From 8d50505ada728258fcdce99120b937ce68298c4e Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:08 +0300 Subject: IB/uverbs: Expose XRQ capabilities Make XRQ capabilities available via ibv_query_device() verb. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_verbs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d5434bbf40c8..9a0b6479fe0c 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -236,6 +236,20 @@ struct ib_uverbs_rss_caps { __u32 reserved; }; +struct ib_uverbs_tm_caps { + /* Max size of rendezvous request message */ + __u32 max_rndv_hdr_size; + /* Max number of entries in tag matching list */ + __u32 max_num_tags; + /* TM flags */ + __u32 flags; + /* Max number of outstanding list operations */ + __u32 max_ops; + /* Max number of SGE in tag matching entry */ + __u32 max_sge; + __u32 reserved; +}; + struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; @@ -247,6 +261,7 @@ struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; __u32 raw_packet_caps; + struct ib_uverbs_tm_caps xrq_caps; }; struct ib_uverbs_query_port { -- cgit From 5b3ec3fcb6bbe081279c73fb574af8c72f14cea0 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:10 +0300 Subject: net/mlx5: Add XRQ support Add support to new XRQ(eXtended shared Receive Queue) hardware object. It supports SRQ semantics with addition of extended receive buffers topologies and offloads. Currently supports tag matching topology and rendezvouz offload. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/srq.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 99d88624ad07..c33e6f7a1afb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -418,6 +418,7 @@ enum mlx5_res_type { MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, + MLX5_RES_XRQ = 5, }; struct mlx5_core_rsc_common { diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 1cde0fd53f90..24ff23e27c8a 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -38,6 +38,7 @@ enum { MLX5_SRQ_FLAG_ERR = (1 << 0), MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), }; struct mlx5_srq_attr { @@ -56,6 +57,10 @@ struct mlx5_srq_attr { u32 user_index; u64 db_record; __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; }; struct mlx5_core_dev; -- cgit From 8d4e6c4caa12dafbcba138e5450b7af17b0b2194 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Mar 2017 18:39:56 +0300 Subject: perf/core, pt, bts: Get rid of itrace_started I just noticed that hw.itrace_started and hw.config are aliased to the same location. Now, the PT driver happens to use both, which works out fine by sheer luck: - STORE(hw.itrace_start) is ordered before STORE(hw.config), in the program order, although there are no compiler barriers to ensure that, - to the perf_log_itrace_start() hw.itrace_start looks set at the same time as when it is intended to be set because both stores happen in the same path, - hw.config is never reset to zero in the PT driver. Now, the use of hw.config by the PT driver makes more sense (it being a HW PMU) than messing around with itrace_started, which is an awkward API to begin with. This patch replaces hw.itrace_started with an attach_state bit and an API call for the PMU drivers to use to communicate the condition. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170330153956.25994-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c00cd4b02f32..adda0aaae6c8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -147,9 +147,6 @@ struct hw_perf_event { struct list_head cqm_groups_entry; struct list_head cqm_group_entry; }; - struct { /* itrace */ - int itrace_started; - }; struct { /* amd_power */ u64 pwr_acc; u64 ptsc; @@ -541,6 +538,7 @@ struct swevent_hlist { #define PERF_ATTACH_GROUP 0x02 #define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK_DATA 0x08 +#define PERF_ATTACH_ITRACE 0x10 struct perf_cgroup; struct ring_buffer; @@ -864,6 +862,7 @@ extern int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size); extern void *perf_get_aux(struct perf_output_handle *handle); extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); +extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); -- cgit From fc7ce9c74c3ad232b084d80148654f926d01ece7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 28 Aug 2017 20:52:49 -0400 Subject: perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR For understanding how the workload maps to memory channels and hardware behavior, it's very important to collect address maps with physical addresses. For example, 3D XPoint access can only be found by filtering the physical address. Add a new sample type for physical address. perf already has a facility to collect data virtual address. This patch introduces a function to convert the virtual address to physical address. The function is quite generic and can be extended to any architecture as long as a virtual address is provided. - For kernel direct mapping addresses, virt_to_phys is used to convert the virtual addresses to physical address. - For user virtual addresses, __get_user_pages_fast is used to walk the pages tables for user physical address. - This does not work for vmalloc addresses right now. These are not resolved, but code to do that could be added. The new sample type requires collecting the virtual address. The virtual address will not be output unless SAMPLE_ADDR is applied. For security, the physical address can only be exposed to root or privileged user. Tested-by: Madhavan Srinivasan Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Cc: mpe@ellerman.id.au Link: http://lkml.kernel.org/r/1503967969-48278-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index adda0aaae6c8..718ba163c1b9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -943,6 +943,8 @@ struct perf_sample_data { struct perf_regs regs_intr; u64 stack_user_size; + + u64 phys_addr; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 2a37ae925d85..140ae638cfd6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -139,8 +139,9 @@ enum perf_event_sample_format { PERF_SAMPLE_IDENTIFIER = 1U << 16, PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, + PERF_SAMPLE_PHYS_ADDR = 1U << 19, - PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ }; /* @@ -814,6 +815,7 @@ enum perf_event_type { * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR + * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * }; */ PERF_RECORD_SAMPLE = 9, -- cgit From f52be5708076b75a045ac52c6fef3fffb8300525 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 Aug 2017 10:59:39 +0200 Subject: locking/lockdep: Untangle xhlock history save/restore from task independence Where XHLOCK_{SOFT,HARD} are save/restore points in the xhlocks[] to ensure the temporal IRQ events don't interact with task state, the XHLOCK_PROC is a fundament different beast that just happens to share the interface. The purpose of XHLOCK_PROC is to annotate independent execution inside one task. For example workqueues, each work should appear to run in its own 'pristine' 'task'. Remove XHLOCK_PROC in favour of its own interface to avoid confusion. Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.feng@gmail.com Cc: david@fromorbit.com Cc: johannes@sipsolutions.net Cc: kernel-team@lge.com Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20170829085939.ggmb6xiohw67micb@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 4 ++-- include/linux/lockdep.h | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 9bc050bc81b2..5fdd93bb9300 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -26,7 +26,7 @@ # define trace_hardirq_enter() \ do { \ current->hardirq_context++; \ - crossrelease_hist_start(XHLOCK_HARD, 0);\ + crossrelease_hist_start(XHLOCK_HARD); \ } while (0) # define trace_hardirq_exit() \ do { \ @@ -36,7 +36,7 @@ do { \ # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ - crossrelease_hist_start(XHLOCK_SOFT, 0);\ + crossrelease_hist_start(XHLOCK_SOFT); \ } while (0) # define lockdep_softirq_exit() \ do { \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 78bb7133abed..bfa8e0b0d6f1 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -551,7 +551,6 @@ struct pin_cookie { }; enum xhlock_context_t { XHLOCK_HARD, XHLOCK_SOFT, - XHLOCK_PROC, XHLOCK_CTX_NR, }; @@ -580,8 +579,9 @@ extern void lock_commit_crosslock(struct lockdep_map *lock); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), .cross = 0, } -extern void crossrelease_hist_start(enum xhlock_context_t c, bool force); +extern void crossrelease_hist_start(enum xhlock_context_t c); extern void crossrelease_hist_end(enum xhlock_context_t c); +extern void lockdep_invariant_state(bool force); extern void lockdep_init_task(struct task_struct *task); extern void lockdep_free_task(struct task_struct *task); #else /* !CROSSRELEASE */ @@ -593,8 +593,9 @@ extern void lockdep_free_task(struct task_struct *task); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } -static inline void crossrelease_hist_start(enum xhlock_context_t c, bool force) {} +static inline void crossrelease_hist_start(enum xhlock_context_t c) {} static inline void crossrelease_hist_end(enum xhlock_context_t c) {} +static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} #endif /* CROSSRELEASE */ -- cgit From 966a967116e699762dbf4af7f9e0d1955c25aa37 Mon Sep 17 00:00:00 2001 From: Ying Huang Date: Tue, 8 Aug 2017 12:30:00 +0800 Subject: smp: Avoid using two cache lines for struct call_single_data struct call_single_data is used in IPIs to transfer information between CPUs. Its size is bigger than sizeof(unsigned long) and less than cache line size. Currently it is not allocated with any explicit alignment requirements. This makes it possible for allocated call_single_data to cross two cache lines, which results in double the number of the cache lines that need to be transferred among CPUs. This can be fixed by requiring call_single_data to be aligned with the size of call_single_data. Currently the size of call_single_data is the power of 2. If we add new fields to call_single_data, we may need to add padding to make sure the size of new definition is the power of 2 as well. Fortunately, this is enforced by GCC, which will report bad sizes. To set alignment requirements of call_single_data to the size of call_single_data, a struct definition and a typedef is used. To test the effect of the patch, I used the vm-scalability multiple thread swap test case (swap-w-seq-mt). The test will create multiple threads and each thread will eat memory until all RAM and part of swap is used, so that huge number of IPIs are triggered when unmapping memory. In the test, the throughput of memory writing improves ~5% compared with misaligned call_single_data, because of faster IPIs. Suggested-by: Peter Zijlstra Signed-off-by: Huang, Ying [ Add call_single_data_t and align with size of call_single_data. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Aaron Lu Cc: Borislav Petkov Cc: Eric Dumazet Cc: Juergen Gross Cc: Linus Torvalds Cc: Michael Ellerman Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/87bmnqd6lz.fsf@yhuang-mobile.sh.intel.com Signed-off-by: Ingo Molnar --- include/linux/blkdev.h | 2 +- include/linux/netdevice.h | 2 +- include/linux/smp.h | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..006fa09a641e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -134,7 +134,7 @@ typedef __u32 __bitwise req_flags_t; struct request { struct list_head queuelist; union { - struct call_single_data csd; + call_single_data_t csd; u64 fifo_time; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..6557f320b66e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2774,7 +2774,7 @@ struct softnet_data { unsigned int input_queue_head ____cacheline_aligned_in_smp; /* Elements below can be accessed between CPUs for RPS/RFS */ - struct call_single_data csd ____cacheline_aligned_in_smp; + call_single_data_t csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; unsigned int input_queue_tail; diff --git a/include/linux/smp.h b/include/linux/smp.h index 68123c1fe549..98b1fe027fc9 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -14,13 +14,17 @@ #include typedef void (*smp_call_func_t)(void *info); -struct call_single_data { +struct __call_single_data { struct llist_node llist; smp_call_func_t func; void *info; unsigned int flags; }; +/* Use __aligned() to avoid to use 2 cache lines for 1 csd */ +typedef struct __call_single_data call_single_data_t + __aligned(sizeof(struct __call_single_data)); + /* total number of cpus in this system (may exceed NR_CPUS) */ extern unsigned int total_cpus; @@ -48,7 +52,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); -int smp_call_function_single_async(int cpu, struct call_single_data *csd); +int smp_call_function_single_async(int cpu, call_single_data_t *csd); #ifdef CONFIG_SMP -- cgit From ec81048cc340bb03334e6ca62661ecc0a684897a Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 23 Aug 2017 23:25:38 +0800 Subject: sched/completion: Avoid unnecessary stack allocation for COMPLETION_INITIALIZER_ONSTACK() In theory, COMPLETION_INITIALIZER_ONSTACK() should never affect the stack allocation of the caller. However, on some compilers, a temporary structure was allocated for the return value of COMPLETION_INITIALIZER_ONSTACK(). For example in write_journal() with LOCKDEP_COMPLETIONS=y (GCC is 7.1.1): io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 2462: e8 00 00 00 00 callq 2467 2467: 48 8d 85 80 fd ff ff lea -0x280(%rbp),%rax 246e: 48 c7 c6 00 00 00 00 mov $0x0,%rsi 2475: 48 c7 c2 00 00 00 00 mov $0x0,%rdx x->done = 0; 247c: c7 85 90 fd ff ff 00 movl $0x0,-0x270(%rbp) 2483: 00 00 00 init_waitqueue_head(&x->wait); 2486: 48 8d 78 18 lea 0x18(%rax),%rdi 248a: e8 00 00 00 00 callq 248f if (commit_start + commit_sections <= ic->journal_sections) { 248f: 41 8b 87 a8 00 00 00 mov 0xa8(%r15),%eax io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 2496: 48 8d bd e8 f9 ff ff lea -0x618(%rbp),%rdi 249d: 48 8d b5 90 fd ff ff lea -0x270(%rbp),%rsi 24a4: b9 17 00 00 00 mov $0x17,%ecx 24a9: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) if (commit_start + commit_sections <= ic->journal_sections) { 24ac: 41 39 c6 cmp %eax,%r14d io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 24af: 48 8d bd 90 fd ff ff lea -0x270(%rbp),%rdi 24b6: 48 8d b5 e8 f9 ff ff lea -0x618(%rbp),%rsi 24bd: b9 17 00 00 00 mov $0x17,%ecx 24c2: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) We can obviously see the temporary structure allocated, and the compiler also does two meaningless memcpy with "rep movsq". And according to: https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html#Statement-Exprs The return value of a statement expression is returned by value, so the temporary variable is created in COMPLETION_INITIALIZER_ONSTACK(), and that's why the temporary structures are allocted. To fix this, make the brace block in COMPLETION_INITIALIZER_ONSTACK() return a pointer and dereference it outside the block rather than return the whole structure, in this way, we are able to teach the compiler not to do the unnecessary stack allocation. This could also reduce the stack size even if !LOCKDEP, for example in write_journal(), compiled with gcc 7.1.1, the result of command: objdump -d drivers/md/dm-integrity.o | ./scripts/checkstack.pl x86 before: 0x0000246a write_journal [dm-integrity.o]: 696 after: 0x00002b7a write_journal [dm-integrity.o]: 296 Reported-by: Arnd Bergmann Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Acked-by: Arnd Bergmann Cc: Andrew Morton Cc: Byungchul Park Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/20170823152542.5150-3-boqun.feng@gmail.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/completion.h b/include/linux/completion.h index 791f053f28b7..cae5400022a3 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -74,7 +74,7 @@ static inline void complete_release_commit(struct completion *x) {} #endif #define COMPLETION_INITIALIZER_ONSTACK(work) \ - ({ init_completion(&work); work; }) + (*({ init_completion(&work); &work; })) /** * DECLARE_COMPLETION - declare and initialize a completion structure -- cgit From e8f11db956aa09c1618051a7aaf367d6810d8d8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Aug 2017 14:42:06 +0200 Subject: libata: check for trusted computing in IDENTIFY DEVICE data ATA-8 and later mirrors the TRUSTED COMPUTING SUPPORTED bit in word 48 of the IDENTIFY DEVICE data. Check this before issuing a READ LOG PAGE command to avoid issues with buggy devices. The only downside is that we can't support Security Send / Receive for a device with an older revision due to the conflicting use of this field in earlier specifications. tj: The reason we need this is because some devices which don't support READ LOG PAGE lock up after getting issued that command. Signed-off-by: Christoph Hellwig Tested-by: David Ahern Signed-off-by: Tejun Heo --- include/linux/ata.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index e65ae4b2ed48..c7a353825450 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -60,7 +60,8 @@ enum { ATA_ID_FW_REV = 23, ATA_ID_PROD = 27, ATA_ID_MAX_MULTSECT = 47, - ATA_ID_DWORD_IO = 48, + ATA_ID_DWORD_IO = 48, /* before ATA-8 */ + ATA_ID_TRUSTED = 48, /* ATA-8 and later */ ATA_ID_CAPABILITY = 49, ATA_ID_OLD_PIO_MODES = 51, ATA_ID_OLD_DMA_MODES = 52, @@ -889,6 +890,13 @@ static inline bool ata_id_has_dword_io(const u16 *id) return id[ATA_ID_DWORD_IO] & (1 << 0); } +static inline bool ata_id_has_trusted(const u16 *id) +{ + if (ata_id_major_version(id) <= 7) + return false; + return id[ATA_ID_TRUSTED] & (1 << 0); +} + static inline bool ata_id_has_unload(const u16 *id) { if (ata_id_major_version(id) >= 7 && -- cgit From 2aca392398384f8a2f339f4c661a1699f4f2e2eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 29 Aug 2017 08:36:58 -0700 Subject: Revert "libata: quirk read log on no-name M.2 SSD" This reverts commit 35f0b6a779b8b7a98faefd7c1c660b4dac9a5c26. We now conditionalize issuing of READ LOG PAGE on the TRUSTED COMPUTING SUPPORTED bit in the identity data and this shouldn't be necessary. Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9e927ae7fced..931c32f1f18d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -438,7 +438,6 @@ enum { ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ - ATA_HORKAGE_NO_LOG_PAGE = (1 << 26), /* Doesn't like Get Log Page */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit From 2d2a2b8c080ad8feab7ca87769dedb3c7a83a375 Mon Sep 17 00:00:00 2001 From: Lothar Waßmann Date: Tue, 29 Aug 2017 12:17:13 +0200 Subject: mtd: nand: complain loudly when chip->bits_per_cell is not correctly initialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chip->bits_per_cell which is used to determine the NAND cell type (SLC/MLC) should always have a value != 0. Complain loudly if the value is 0 in nand_is_slc() to catch use before correct initialization. Signed-off-by: Lothar Waßmann Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 8fb488d586d6..c433f093766a 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1234,6 +1234,8 @@ int onfi_init_data_interface(struct nand_chip *chip, */ static inline bool nand_is_slc(struct nand_chip *chip) { + WARN(chip->bits_per_cell == 0, + "chip->bits_per_cell is used uninitialized\n"); return chip->bits_per_cell == 1; } -- cgit From c31e5a4876cdfbfcc28c4fb201ad872c65671067 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:40 +0200 Subject: xdp: remove redundant argument to trace_xdp_redirect Supplying the action argument XDP_REDIRECT to the tracepoint xdp_redirect is redundant as it is only called in-case this action was specified. Remove the argument, but keep "act" member of the tracepoint struct and populate it with XDP_REDIRECT. This makes it easier to write a common bpf_prog processing events. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 27cf2ef35f5f..f684f3b36bca 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -52,10 +52,10 @@ TRACE_EVENT(xdp_exception, TRACE_EVENT(xdp_redirect, TP_PROTO(const struct net_device *dev, - const struct bpf_prog *xdp, u32 act, + const struct bpf_prog *xdp, int to_index, int err), - TP_ARGS(dev, xdp, act, to_index, err), + TP_ARGS(dev, xdp, to_index, err), TP_STRUCT__entry( __array(u8, prog_tag, 8) @@ -68,7 +68,7 @@ TRACE_EVENT(xdp_redirect, TP_fast_assign( BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); - __entry->act = act; + __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->to_index = to_index; __entry->err = err; -- cgit From 8d3b778ff544b369f0847e6c15f3e73057298aa4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:45 +0200 Subject: xdp: tracepoint xdp_redirect also need a map argument To make sense of the map index, the tracepoint user also need to know that map we are talking about. Supply the map pointer but only expose the map->id. The 'to_index' is renamed 'to_ifindex'. In the xdp_redirect_map case, this is the result of the devmap lookup. The map lookup key is exposed as map_index, which is needed to troubleshoot in case the lookup failed. The 'to_ifindex' is placed after 'err' to keep TP_STRUCT as common as possible. This also keeps the TP_STRUCT similar enough, that userspace can write a monitor program, that doesn't need to care about whether bpf_redirect or bpf_redirect_map were used. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index f684f3b36bca..573dcfa1aeaa 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -49,20 +49,23 @@ TRACE_EVENT(xdp_exception, __entry->ifindex) ); -TRACE_EVENT(xdp_redirect, +DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, const struct bpf_prog *xdp, - int to_index, int err), + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), - TP_ARGS(dev, xdp, to_index, err), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), TP_STRUCT__entry( __array(u8, prog_tag, 8) __field(u32, act) __field(int, ifindex) - __field(int, to_index) __field(int, err) + __field(int, to_ifindex) + __field(u32, map_id) + __field(int, map_index) ), TP_fast_assign( @@ -70,16 +73,35 @@ TRACE_EVENT(xdp_redirect, memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; - __entry->to_index = to_index; __entry->err = err; + __entry->to_ifindex = to_ifindex; + __entry->map_id = map ? map->id : 0; + __entry->map_index = map_index; ), - TP_printk("prog=%s action=%s ifindex=%d to_index=%d err=%d", + TP_printk("prog=%s action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", __print_hex_str(__entry->prog_tag, 8), __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), - __entry->ifindex, __entry->to_index, - __entry->err) + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) ); + +DEFINE_EVENT(xdp_redirect_template, xdp_redirect, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) +); + +#define _trace_xdp_redirect(dev, xdp, to, err) \ + trace_xdp_redirect(dev, xdp, to, err, NULL, 0); + +#define trace_xdp_redirect_map(dev, xdp, fwd, err, map, idx) \ + trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); + #endif /* _TRACE_XDP_H */ #include -- cgit From b06337dfdb16bc3f668326b6a618c472c671182a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:51 +0200 Subject: xdp: make xdp tracepoints report bpf prog id instead of prog_tag Given previous patch expose the map_id, it seems natural to also report the bpf prog id. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 573dcfa1aeaa..89eba564e199 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -31,20 +31,19 @@ TRACE_EVENT(xdp_exception, TP_ARGS(dev, xdp, act), TP_STRUCT__entry( - __array(u8, prog_tag, 8) + __field(int, prog_id) __field(u32, act) __field(int, ifindex) ), TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); - memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __entry->prog_id = xdp->aux->id; __entry->act = act; __entry->ifindex = dev->ifindex; ), - TP_printk("prog=%s action=%s ifindex=%d", - __print_hex_str(__entry->prog_tag, 8), + TP_printk("prog_id=%d action=%s ifindex=%d", + __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex) ); @@ -59,7 +58,7 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), TP_STRUCT__entry( - __array(u8, prog_tag, 8) + __field(int, prog_id) __field(u32, act) __field(int, ifindex) __field(int, err) @@ -69,8 +68,7 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, ), TP_fast_assign( - BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag)); - memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag)); + __entry->prog_id = xdp->aux->id; __entry->act = XDP_REDIRECT; __entry->ifindex = dev->ifindex; __entry->err = err; @@ -79,9 +77,9 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, __entry->map_index = map_index; ), - TP_printk("prog=%s action=%s ifindex=%d to_ifindex=%d err=%d" + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" " map_id=%d map_index=%d", - __print_hex_str(__entry->prog_tag, 8), + __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, __entry->err, -- cgit From f5836ca5e9867fa6ab88cadb9873af56d9ceb589 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:37:56 +0200 Subject: xdp: separate xdp_redirect tracepoint in error case There is a need to separate the xdp_redirect tracepoint into two tracepoints, for separating the error case from the normal forward case. Due to the extreme speeds XDP is operating at, loading a tracepoint have a measurable impact. Single core XDP REDIRECT (ethtool tuned rx-usecs 25) can do 13.7 Mpps forwarding, but loading a simple bpf_prog at the tracepoint (with a return 0) reduce perf to 10.2 Mpps (CPU E5-1650 v4 @ 3.60GHz, driver: ixgbe) The overhead of loading a bpf-based tracepoint can be calculated to cost 25 nanosec ((1/13782002-1/10267937)*10^9 = -24.83 ns). Using perf record on the tracepoint event, with a non-matching --filter expression, the overhead is much larger. Performance drops to 8.3 Mpps, cost 48 nanosec ((1/13782002-1/8312497)*10^9 = -47.74)) Having a separate tracepoint for err cases, which should be less frequent, allow running a continuous monitor for errors while not affecting the redirect forward performance (this have also been verified by measurements). Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 89eba564e199..1eebad55ebd4 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -94,11 +94,25 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect, TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) ); -#define _trace_xdp_redirect(dev, xdp, to, err) \ - trace_xdp_redirect(dev, xdp, to, err, NULL, 0); +DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) +); + +#define _trace_xdp_redirect(dev, xdp, to) \ + trace_xdp_redirect(dev, xdp, to, 0, NULL, 0); + +#define _trace_xdp_redirect_err(dev, xdp, to, err) \ + trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); + +#define trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ + trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, 0, map, idx); -#define trace_xdp_redirect_map(dev, xdp, fwd, err, map, idx) \ - trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); +#define trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ + trace_xdp_redirect_err(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); #endif /* _TRACE_XDP_H */ -- cgit From 59a308967589f5b3f1f42793ab49bc2e18069769 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 29 Aug 2017 16:38:01 +0200 Subject: xdp: separate xdp_redirect tracepoint in map case Creating as specific xdp_redirect_map variant of the xdp tracepoints allow users to write simpler/faster BPF progs that get attached to these tracepoints. Goal is to still keep the tracepoints in xdp_redirect and xdp_redirect_map similar enough, that a tool can read the top part of the TP_STRUCT and produce similar monitor statistics. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1eebad55ebd4..862575ac8da9 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -77,13 +77,11 @@ DECLARE_EVENT_CLASS(xdp_redirect_template, __entry->map_index = map_index; ), - TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" - " map_id=%d map_index=%d", + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d", __entry->prog_id, __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), __entry->ifindex, __entry->to_ifindex, - __entry->err, - __entry->map_id, __entry->map_index) + __entry->err) ); DEFINE_EVENT(xdp_redirect_template, xdp_redirect, @@ -108,11 +106,43 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, #define _trace_xdp_redirect_err(dev, xdp, to, err) \ trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); -#define trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ - trace_xdp_redirect(dev, xdp, fwd ? fwd->ifindex : 0, 0, map, idx); +DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) +); + +DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, + TP_PROTO(const struct net_device *dev, + const struct bpf_prog *xdp, + int to_ifindex, int err, + const struct bpf_map *map, u32 map_index), + TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" + " map_id=%d map_index=%d", + __entry->prog_id, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->ifindex, __entry->to_ifindex, + __entry->err, + __entry->map_id, __entry->map_index) +); + +#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ + trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ + 0, map, idx); -#define trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ - trace_xdp_redirect_err(dev, xdp, fwd ? fwd->ifindex : 0, err, map, idx); +#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ + trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ + err, map, idx); #endif /* _TRACE_XDP_H */ -- cgit From f5808ac158f2b16b686a3d3c0879c5d6048aba14 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 28 Aug 2017 09:47:11 +0930 Subject: leds: gpio: Allow LED to retain state at shutdown In some systems, such as Baseboard Management Controllers (BMCs), we want to retain the state of LEDs across a reboot of the BMC (whilst the host remains up). Implement support for the retain-state-shutdown devicetree property in leds-gpio. Signed-off-by: Andrew Jeffery Acked-by: Pavel Machek Tested-by: Brandon Wyman Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 64c56d454f7d..bf6db4fe895b 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -49,6 +49,7 @@ struct led_classdev { #define LED_HW_PLUGGABLE (1 << 19) #define LED_PANIC_INDICATOR (1 << 20) #define LED_BRIGHT_HW_CHANGED (1 << 21) +#define LED_RETAIN_AT_SHUTDOWN (1 << 22) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; @@ -392,6 +393,7 @@ struct gpio_led { unsigned retain_state_suspended : 1; unsigned panic_indicator : 1; unsigned default_state : 2; + unsigned retain_state_shutdown : 1; /* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */ struct gpio_desc *gpiod; }; -- cgit From a92e145059cb883155a24a2d3ac33296d33d9df7 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 23 Aug 2017 15:17:47 -0400 Subject: drm/ttm: Add DMA map/unmap tracepoint (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also exports two functions that vendor drivers can call to trace DMA mappings. This is meant to help translate IOMMU mappings of bus addresses back to physical pages. Used by the umr amdgpu debugger for instance. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher (v2): Use dev_name() to get PCI path instead. (v3): Use correct types for dma/phys addresses --- include/drm/ttm/ttm_debug.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/drm/ttm/ttm_debug.h (limited to 'include') diff --git a/include/drm/ttm/ttm_debug.h b/include/drm/ttm/ttm_debug.h new file mode 100644 index 000000000000..b5e460fa5086 --- /dev/null +++ b/include/drm/ttm/ttm_debug.h @@ -0,0 +1,31 @@ +/************************************************************************** + * + * Copyright (c) 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Tom St Denis + */ +extern void ttm_trace_dma_map(struct device *dev, struct ttm_dma_tt *tt); +extern void ttm_trace_dma_unmap(struct device *dev, struct ttm_dma_tt *tt); -- cgit From a4dec819c8bba6365eb893a4ca88db4dd1210110 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 18 Aug 2017 10:04:57 -0400 Subject: drm/ttm: Add helper functions to populate/map in one call (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions replace a section of common code found in radeon/amdgpu drivers (and possibly others) as part of the ttm_tt_*populate() callbacks. v2: squash in fix for sw iommu from Tom Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_page_alloc.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 49a828425fa2..bf21166f2b97 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -83,6 +83,17 @@ extern int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); extern int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); extern void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); + +/** + * Populates and DMA maps pages to fullfil a ttm_dma_populate() request + */ +int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt); + +/** + * Unpopulates and DMA unmaps pages as part of a + * ttm_dma_unpopulate() request */ +void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt); + #else static inline int ttm_dma_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) @@ -105,6 +116,16 @@ static inline void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) { } + +static inline int ttm_populate_and_map_pages(struct device *dev, struct ttm_dma_tt *tt) +{ + return -ENOMEM; +} + +static inline void ttm_unmap_and_unpopulate_pages(struct device *dev, struct ttm_dma_tt *tt) +{ +} + #endif #endif -- cgit From 96bec198352799794b0f8937620e811ef8b9fa22 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 24 Aug 2017 06:46:39 -0400 Subject: drm/ttm: Remove needless 'extern' on functions in header. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minor tidy up. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_page_alloc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index bf21166f2b97..38a2b4770c35 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -47,7 +47,7 @@ void ttm_page_alloc_fini(void); * * Add backing pages to all of @ttm */ -extern int ttm_pool_populate(struct ttm_tt *ttm); +int ttm_pool_populate(struct ttm_tt *ttm); /** * ttm_pool_unpopulate: @@ -56,12 +56,12 @@ extern int ttm_pool_populate(struct ttm_tt *ttm); * * Free all pages of @ttm */ -extern void ttm_pool_unpopulate(struct ttm_tt *ttm); +void ttm_pool_unpopulate(struct ttm_tt *ttm); /** * Output the state of pools to debugfs file */ -extern int ttm_page_alloc_debugfs(struct seq_file *m, void *data); +int ttm_page_alloc_debugfs(struct seq_file *m, void *data); #if defined(CONFIG_SWIOTLB) || defined(CONFIG_INTEL_IOMMU) @@ -78,10 +78,10 @@ void ttm_dma_page_alloc_fini(void); /** * Output the state of pools to debugfs file */ -extern int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); +int ttm_dma_page_alloc_debugfs(struct seq_file *m, void *data); -extern int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); -extern void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); +int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev); +void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev); /** -- cgit From 52c9285d47459cf241e144c7d8ef15941ba1b181 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 18 Aug 2017 20:27:56 +0530 Subject: PCI: endpoint: Add support for configurable page size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci-epc-mem uses a page size equal to *PAGE_SIZE* (usually 4KB) to manage the address space. However certain platforms like TI's K2G have a restriction that this address space should be either divided into 1MB/2MB/4MB or 8MB sizes (Ref: 11.14.4.9.1 Outbound Address Translation in K2G TRM SPRUHY8F January 2016 – Revised May 2017). Add support to handle different page sizes here. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Bjorn Helgaas --- include/linux/pci-epc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index af5edbf3eea3..f7a04e1af112 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -62,11 +62,13 @@ struct pci_epc_ops { * @size: the size of the PCI address space * @bitmap: bitmap to manage the PCI address space * @pages: number of bits representing the address region + * @page_size: size of each page */ struct pci_epc_mem { phys_addr_t phys_base; size_t size; unsigned long *bitmap; + size_t page_size; int pages; }; @@ -98,6 +100,9 @@ struct pci_epc { #define devm_pci_epc_create(dev, ops) \ __devm_pci_epc_create((dev), (ops), THIS_MODULE) +#define pci_epc_mem_init(epc, phys_addr, size) \ + __pci_epc_mem_init((epc), (phys_addr), (size), PAGE_SIZE) + static inline void epc_set_drvdata(struct pci_epc *epc, void *data) { dev_set_drvdata(&epc->dev, data); @@ -135,7 +140,8 @@ void pci_epc_stop(struct pci_epc *epc); struct pci_epc *pci_epc_get(const char *epc_name); void pci_epc_put(struct pci_epc *epc); -int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size); +int __pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size, + size_t page_size); void pci_epc_mem_exit(struct pci_epc *epc); void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); -- cgit From 9de7e14a1a9c6bc4f9be6ccd9b951341a80dbd52 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 29 Aug 2017 10:20:38 +0200 Subject: drbd: new disk-option disable-write-same Some backend devices claim to support write-same, but would fail actual write-same requests. Allow to set (or toggle) whether or not DRBD tries to support write-same. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- include/linux/drbd_genl.h | 3 ++- include/linux/drbd_limits.h | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index 2896f93808ae..4e6d4d4c7056 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -132,7 +132,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, __flg_field_def(18, DRBD_GENLA_F_MANDATORY, disk_drain, DRBD_DISK_DRAIN_DEF) __flg_field_def(19, DRBD_GENLA_F_MANDATORY, md_flushes, DRBD_MD_FLUSHES_DEF) __flg_field_def(23, 0 /* OPTIONAL */, al_updates, DRBD_AL_UPDATES_DEF) - __flg_field_def(24, 0 /* OPTIONAL */, discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED) + __flg_field_def(24, 0 /* OPTIONAL */, discard_zeroes_if_aligned, DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF) + __flg_field_def(26, 0 /* OPTIONAL */, disable_write_same, DRBD_DISABLE_WRITE_SAME_DEF) ) GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index ddac68422a96..24ae1b9b76c7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -209,12 +209,18 @@ #define DRBD_MD_FLUSHES_DEF 1 #define DRBD_TCP_CORK_DEF 1 #define DRBD_AL_UPDATES_DEF 1 + /* We used to ignore the discard_zeroes_data setting. * To not change established (and expected) behaviour, * by default assume that, for discard_zeroes_data=0, * we can make that an effective discard_zeroes_data=1, * if we only explicitly zero-out unaligned partial chunks. */ -#define DRBD_DISCARD_ZEROES_IF_ALIGNED 1 +#define DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF 1 + +/* Some backends pretend to support WRITE SAME, + * but fail such requests when they are actually submitted. + * This is to tell DRBD to not even try. */ +#define DRBD_DISABLE_WRITE_SAME_DEF 0 #define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 #define DRBD_ALWAYS_ASBP_DEF 0 -- cgit From 365cf663b64791e341f425385c7ae152327c7009 Mon Sep 17 00:00:00 2001 From: Roland Kammerer Date: Tue, 29 Aug 2017 10:20:48 +0200 Subject: drbd: switch from kmalloc() to kmalloc_array() We had one call to kmalloc that actually allocates an array. Switch that one to the kmalloc_array() function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 002611c85318..2d0259327721 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -51,7 +51,7 @@ #endif extern const char *drbd_buildtag(void); -#define REL_VERSION "8.4.7" +#define REL_VERSION "8.4.10" #define API_VERSION 1 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 101 -- cgit From b74fd306ef2d48781e807dd506cca818bb83d749 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 29 Aug 2017 13:16:57 -0700 Subject: bridge: fdb add and delete tracepoints A few useful tracepoints to trace bridge forwarding database updates. Signed-off-by: Roopa Prabhu Reviewed-by: Florian Fainelli Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/trace/events/bridge.h | 98 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 include/trace/events/bridge.h (limited to 'include') diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h new file mode 100644 index 000000000000..0f1cde00af11 --- /dev/null +++ b/include/trace/events/bridge.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bridge + +#if !defined(_TRACE_BRIDGE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BRIDGE_H + +#include +#include + +#include "../../../net/bridge/br_private.h" + +TRACE_EVENT(br_fdb_add, + + TP_PROTO(struct ndmsg *ndm, struct net_device *dev, + const unsigned char *addr, u16 vid, u16 nlh_flags), + + TP_ARGS(ndm, dev, addr, vid, nlh_flags), + + TP_STRUCT__entry( + __field(u8, ndm_flags) + __string(dev, dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(u16, nlh_flags) + ), + + TP_fast_assign( + __assign_str(dev, dev->name); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + __entry->nlh_flags = nlh_flags; + __entry->ndm_flags = ndm->ndm_flags; + ), + + TP_printk("dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u nlh_flags %04x ndm_flags %02x", + __get_str(dev), __entry->addr[0], __entry->addr[1], + __entry->addr[2], __entry->addr[3], __entry->addr[4], + __entry->addr[5], __entry->vid, + __entry->nlh_flags, __entry->ndm_flags) +); + +TRACE_EVENT(br_fdb_external_learn_add, + + TP_PROTO(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *addr, u16 vid), + + TP_ARGS(br, p, addr, vid), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, p ? p->dev->name : "null") + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, p ? p->dev->name : "null"); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + ), + + TP_printk("br_dev %s port %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid) +); + +TRACE_EVENT(fdb_delete, + + TP_PROTO(struct net_bridge *br, struct net_bridge_fdb_entry *f), + + TP_ARGS(br, f), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, f->dst ? f->dst->dev->name : "null") + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, f->dst ? f->dst->dev->name : "null"); + memcpy(__entry->addr, f->addr.addr, ETH_ALEN); + __entry->vid = f->vlan_id; + ), + + TP_printk("br_dev %s dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid) +); + +#endif /* _TRACE_BRIDGE_H */ + +/* This part must be outside protection */ +#include -- cgit From c73c8b1e47ca275388f774fd36560ab7e994e99e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 28 Aug 2017 16:38:20 +0300 Subject: net/mlx4_core: Dynamically allocate structs at mlx4_slave_cap In order to avoid temporary large structs on the stack, allocate them dynamically. Signed-off-by: Eran Ben Elisha Signed-off-by: Tal Alon Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b54517c05e9a..9844606f9491 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -518,6 +518,14 @@ struct mlx4_phys_caps { u32 base_tunnel_sqpn; }; +struct mlx4_spec_qps { + u32 qp0_qkey; + u32 qp0_proxy; + u32 qp0_tunnel; + u32 qp1_proxy; + u32 qp1_tunnel; +}; + struct mlx4_caps { u64 fw_ver; u32 function; @@ -547,11 +555,7 @@ struct mlx4_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int max_tc_eth; - u32 *qp0_qkey; - u32 *qp0_proxy; - u32 *qp1_proxy; - u32 *qp0_tunnel; - u32 *qp1_tunnel; + struct mlx4_spec_qps *spec_qps; int num_srqs; int max_srq_wqes; int max_srq_sge; -- cgit From be59960395f86991c6599c41d8c421fe4bf7a210 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 28 Aug 2017 16:38:23 +0300 Subject: net/mlx4: Add user mac FW update support Adding support for updating the FW on new port mac, when port mac change is requested by the user. This info is required by the FW as OEM management tools require this info directly from the NIC FW. Check device capability bit to verify the FW supports user mac. If the FW does support it, use set_port command to notify the FW on the new mac. The feature is relevant only to PF port mac. Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9844606f9491..89c0e7f7cd9b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -224,6 +224,7 @@ enum { MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37, + MLX4_DEV_CAP_FLAG2_USER_MAC_EN = 1ULL << 38, }; enum { @@ -1377,6 +1378,7 @@ int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); +int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac); int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); -- cgit From 2804fd3af6ba5ae5737705b27146455eabe2e2f8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 28 Aug 2017 15:03:13 -0400 Subject: if_ether: add forces ife lfb type This patch adds the forces IFE lfb type according to IEEE registered ethertypes. See http://standards-oui.ieee.org/ethertype/eth.txt for more information. Since there exists the IFE subsystem it can be used there. This patch also use the correct word "ForCES" instead of "FoRCES" which is a spelling error inside the IEEE ethertype specification. Signed-off-by: Alexander Aring Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index efeb1190c2ca..f68f6bf4a253 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -104,6 +104,7 @@ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ3 0x9300 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */ +#define ETH_P_IFE 0xED3E /* ForCES inter-FE LFB type */ #define ETH_P_AF_IUCV 0xFBFB /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_802_3_MIN 0x0600 /* If the value in the ethernet type is less than this value -- cgit From 155e6f649757c902901e599c268f8b575ddac1f8 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 28 Aug 2017 21:43:21 +0200 Subject: ether: add NSH ethertype The NSH draft says: An IEEE EtherType, 0x894F, has been allocated for NSH. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index f68f6bf4a253..61f7ccce5b69 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -99,6 +99,7 @@ #define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */ #define ETH_P_80221 0x8917 /* IEEE 802.21 Media Independent Handover Protocol */ #define ETH_P_HSR 0x892F /* IEC 62439-3 HSRv1 */ +#define ETH_P_NSH 0x894F /* Network Service Header */ #define ETH_P_LOOPBACK 0x9000 /* Ethernet loopback packet, per IEEE 802.3 */ #define ETH_P_QINQ1 0x9100 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_QINQ2 0x9200 /* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */ -- cgit From fa20e0e32cb3dfc1760b6254b64977f2fb5bd851 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Mon, 28 Aug 2017 21:43:22 +0200 Subject: vxlan: factor out VXLAN-GPE next protocol The values are shared between VXLAN-GPE and NSH. Originally probably by coincidence but I notified both working groups about this last year and they seem to keep the values in sync since then. Hopefully they'll get a single IANA registry for the values, too. (I asked them for that.) Factor out the code to be shared by the NSH implementation. NSH and MPLS values are added in this patch, too. For MPLS, the drafts incorrectly assign only a single value, while we have two MPLS ethertypes. I raised the problem with both groups. For now, I assume the value is for unicast. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/tun_proto.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/net/vxlan.h | 6 ------ 2 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 include/net/tun_proto.h (limited to 'include') diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h new file mode 100644 index 000000000000..2ea3deba4c99 --- /dev/null +++ b/include/net/tun_proto.h @@ -0,0 +1,49 @@ +#ifndef __NET_TUN_PROTO_H +#define __NET_TUN_PROTO_H + +#include + +/* One byte protocol values as defined by VXLAN-GPE and NSH. These will + * hopefully get a shared IANA registry. + */ +#define TUN_P_IPV4 0x01 +#define TUN_P_IPV6 0x02 +#define TUN_P_ETHERNET 0x03 +#define TUN_P_NSH 0x04 +#define TUN_P_MPLS_UC 0x05 + +static inline __be16 tun_p_to_eth_p(u8 proto) +{ + switch (proto) { + case TUN_P_IPV4: + return htons(ETH_P_IP); + case TUN_P_IPV6: + return htons(ETH_P_IPV6); + case TUN_P_ETHERNET: + return htons(ETH_P_TEB); + case TUN_P_NSH: + return htons(ETH_P_NSH); + case TUN_P_MPLS_UC: + return htons(ETH_P_MPLS_UC); + } + return 0; +} + +static inline u8 tun_p_from_eth_p(__be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + return TUN_P_IPV4; + case htons(ETH_P_IPV6): + return TUN_P_IPV6; + case htons(ETH_P_TEB): + return TUN_P_ETHERNET; + case htons(ETH_P_NSH): + return TUN_P_NSH; + case htons(ETH_P_MPLS_UC): + return TUN_P_MPLS_UC; + } + return 0; +} + +#endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 3f430e38ab82..4e3876dde295 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -168,12 +168,6 @@ reserved_flags2:2; #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ cpu_to_be32(0xff)) -/* VXLAN-GPE header Next Protocol. */ -#define VXLAN_GPE_NP_IPV4 0x01 -#define VXLAN_GPE_NP_IPV6 0x02 -#define VXLAN_GPE_NP_ETHERNET 0x03 -#define VXLAN_GPE_NP_NSH 0x04 - struct vxlan_metadata { u32 gbp; }; -- cgit From 1f0b7744c50573df464ca33d8e5275be509f852b Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Mon, 28 Aug 2017 21:43:23 +0200 Subject: net: add NSH header structures and helpers NSH (Network Service Header)[1] is a new protocol for service function chaining, it can be handled as a L3 protocol like IPv4 and IPv6, Eth + NSH + Inner packet or VxLAN-gpe + NSH + Inner packet are two typical use cases. This patch adds NSH header structures and helpers for NSH GSO support and Open vSwitch NSH support. [1] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/ [Jiri: added nsh_hdr() helper and renamed the header struct to "struct nshhdr" to match the usual pattern. Removed packet type defines, these are now shared with VXLAN-GPE.] Signed-off-by: Yi Yang Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/nsh.h | 307 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 307 insertions(+) create mode 100644 include/net/nsh.h (limited to 'include') diff --git a/include/net/nsh.h b/include/net/nsh.h new file mode 100644 index 000000000000..a1eaea20be96 --- /dev/null +++ b/include/net/nsh.h @@ -0,0 +1,307 @@ +#ifndef __NET_NSH_H +#define __NET_NSH_H 1 + +#include + +/* + * Network Service Header: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Ver|O|U| TTL | Length |U|U|U|U|MD Type| Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Service Path Identifier (SPI) | Service Index | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * ~ Mandatory/Optional Context Headers ~ + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Version: The version field is used to ensure backward compatibility + * going forward with future NSH specification updates. It MUST be set + * to 0x0 by the sender, in this first revision of NSH. Given the + * widespread implementation of existing hardware that uses the first + * nibble after an MPLS label stack for ECMP decision processing, this + * document reserves version 01b and this value MUST NOT be used in + * future versions of the protocol. Please see [RFC7325] for further + * discussion of MPLS-related forwarding requirements. + * + * O bit: Setting this bit indicates an Operations, Administration, and + * Maintenance (OAM) packet. The actual format and processing of SFC + * OAM packets is outside the scope of this specification (see for + * example [I-D.ietf-sfc-oam-framework] for one approach). + * + * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM + * packets. The O bit MUST NOT be modified along the SFP. + * + * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC + * OAM procedures SHOULD discard packets with O bit set, but MAY support + * a configurable parameter to enable forwarding received SFC OAM + * packets unmodified to the next element in the chain. Forwarding OAM + * packets unmodified by SFC elements that do not support SFC OAM + * procedures may be acceptable for a subset of OAM functions, but can + * result in unexpected outcomes for others, thus it is recommended to + * analyze the impact of forwarding an OAM packet for all OAM functions + * prior to enabling this behavior. The configurable parameter MUST be + * disabled by default. + * + * TTL: Indicates the maximum SFF hops for an SFP. This field is used + * for service plane loop detection. The initial TTL value SHOULD be + * configurable via the control plane; the configured initial value can + * be specific to one or more SFPs. If no initial value is explicitly + * provided, the default initial TTL value of 63 MUST be used. Each SFF + * involved in forwarding an NSH packet MUST decrement the TTL value by + * 1 prior to NSH forwarding lookup. Decrementing by 1 from an incoming + * value of 0 shall result in a TTL value of 63. The packet MUST NOT be + * forwarded if TTL is, after decrement, 0. + * + * All other flag fields, marked U, are unassigned and available for + * future use, see Section 11.2.1. Unassigned bits MUST be set to zero + * upon origination, and MUST be ignored and preserved unmodified by + * other NSH supporting elements. Elements which do not understand the + * meaning of any of these bits MUST NOT modify their actions based on + * those unknown bits. + * + * Length: The total length, in 4-byte words, of NSH including the Base + * Header, the Service Path Header, the Fixed Length Context Header or + * Variable Length Context Header(s). The length MUST be 0x6 for MD + * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to + * 0x2. The length of the NSH header MUST be an integer multiple of 4 + * bytes, thus variable length metadata is always padded out to a + * multiple of 4 bytes. + * + * MD Type: Indicates the format of NSH beyond the mandatory Base Header + * and the Service Path Header. MD Type defines the format of the + * metadata being carried. + * + * 0x0 - This is a reserved value. Implementations SHOULD silently + * discard packets with MD Type 0x0. + * + * 0x1 - This indicates that the format of the header includes a fixed + * length Context Header (see Figure 4 below). + * + * 0x2 - This does not mandate any headers beyond the Base Header and + * Service Path Header, but may contain optional variable length Context + * Header(s). The semantics of the variable length Context Header(s) + * are not defined in this document. The format of the optional + * variable length Context Headers is provided in Section 2.5.1. + * + * 0xF - This value is reserved for experimentation and testing, as per + * [RFC3692]. Implementations not explicitly configured to be part of + * an experiment SHOULD silently discard packets with MD Type 0xF. + * + * Next Protocol: indicates the protocol type of the encapsulated data. + * NSH does not alter the inner payload, and the semantics on the inner + * protocol remain unchanged due to NSH service function chaining. + * Please see the IANA Considerations section below, Section 11.2.5. + * + * This document defines the following Next Protocol values: + * + * 0x1: IPv4 + * 0x2: IPv6 + * 0x3: Ethernet + * 0x4: NSH + * 0x5: MPLS + * 0xFE: Experiment 1 + * 0xFF: Experiment 2 + * + * Packets with Next Protocol values not supported SHOULD be silently + * dropped by default, although an implementation MAY provide a + * configuration parameter to forward them. Additionally, an + * implementation not explicitly configured for a specific experiment + * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE + * and 0xFF. + * + * Service Path Identifier (SPI): Identifies a service path. + * Participating nodes MUST use this identifier for Service Function + * Path selection. The initial classifier MUST set the appropriate SPI + * for a given classification result. + * + * Service Index (SI): Provides location within the SFP. The initial + * classifier for a given SFP SHOULD set the SI to 255, however the + * control plane MAY configure the initial value of SI as appropriate + * (i.e., taking into account the length of the service function path). + * The Service Index MUST be decremented by a value of 1 by Service + * Functions or by SFC Proxy nodes after performing required services + * and the new decremented SI value MUST be used in the egress packet's + * NSH. The initial Classifier MUST send the packet to the first SFF in + * the identified SFP for forwarding along an SFP. If re-classification + * occurs, and that re-classification results in a new SPI, the + * (re)classifier is, in effect, the initial classifier for the + * resultant SPI. + * + * The SI is used in conjunction the with Service Path Identifier for + * Service Function Path Selection and for determining the next SFF/SF + * in the path. The SI is also valuable when troubleshooting or + * reporting service paths. Additionally, while the TTL field is the + * main mechanism for service plane loop detection, the SI can also be + * used for detecting service plane loops. + * + * When the Base Header specifies MD Type = 0x1, a Fixed Length Context + * Header (16-bytes) MUST be present immediately following the Service + * Path Header. The value of a Fixed Length Context + * Header that carries no metadata MUST be set to zero. + * + * When the base header specifies MD Type = 0x2, zero or more Variable + * Length Context Headers MAY be added, immediately following the + * Service Path Header (see Figure 5). Therefore, Length = 0x2, + * indicates that only the Base Header followed by the Service Path + * Header are present. The optional Variable Length Context Headers + * MUST be of an integer number of 4-bytes. The base header Length + * field MUST be used to determine the offset to locate the original + * packet or frame for SFC nodes that require access to that + * information. + * + * The format of the optional variable length Context Headers + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Metadata Class | Type |U| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Variable Metadata | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Metadata Class (MD Class): Defines the scope of the 'Type' field to + * provide a hierarchical namespace. The IANA Considerations + * Section 11.2.4 defines how the MD Class values can be allocated to + * standards bodies, vendors, and others. + * + * Type: Indicates the explicit type of metadata being carried. The + * definition of the Type is the responsibility of the MD Class owner. + * + * Unassigned bit: One unassigned bit is available for future use. This + * bit MUST NOT be set, and MUST be ignored on receipt. + * + * Length: Indicates the length of the variable metadata, in bytes. In + * case the metadata length is not an integer number of 4-byte words, + * the sender MUST add pad bytes immediately following the last metadata + * byte to extend the metadata to an integer number of 4-byte words. + * The receiver MUST round up the length field to the nearest 4-byte + * word boundary, to locate and process the next field in the packet. + * The receiver MUST access only those bytes in the metadata indicated + * by the length field (i.e., actual number of bytes) and MUST ignore + * the remaining bytes up to the nearest 4-byte word boundary. The + * Length may be 0 or greater. + * + * A value of 0 denotes a Context Header without a Variable Metadata + * field. + * + * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/ + */ + +/** + * struct nsh_md1_ctx - Keeps track of NSH context data + * @nshc<1-4>: NSH Contexts. + */ +struct nsh_md1_ctx { + __be32 context[4]; +}; + +struct nsh_md2_tlv { + __be16 md_class; + u8 type; + u8 length; + u8 md_value[]; +}; + +struct nshhdr { + __be16 ver_flags_ttl_len; + u8 mdtype; + u8 np; + __be32 path_hdr; + union { + struct nsh_md1_ctx md1; + struct nsh_md2_tlv md2; + }; +}; + +/* Masking NSH header fields. */ +#define NSH_VER_MASK 0xc000 +#define NSH_VER_SHIFT 14 +#define NSH_FLAGS_MASK 0x3000 +#define NSH_FLAGS_SHIFT 12 +#define NSH_TTL_MASK 0x0fc0 +#define NSH_TTL_SHIFT 6 +#define NSH_LEN_MASK 0x003f +#define NSH_LEN_SHIFT 0 + +#define NSH_MDTYPE_MASK 0x0f +#define NSH_MDTYPE_SHIFT 0 + +#define NSH_SPI_MASK 0xffffff00 +#define NSH_SPI_SHIFT 8 +#define NSH_SI_MASK 0x000000ff +#define NSH_SI_SHIFT 0 + +/* MD Type Registry. */ +#define NSH_M_TYPE1 0x01 +#define NSH_M_TYPE2 0x02 +#define NSH_M_EXP1 0xFE +#define NSH_M_EXP2 0xFF + +/* NSH Base Header Length */ +#define NSH_BASE_HDR_LEN 8 + +/* NSH MD Type 1 header Length. */ +#define NSH_M_TYPE1_LEN 24 + +/* NSH header maximum Length. */ +#define NSH_HDR_MAX_LEN 256 + +/* NSH context headers maximum Length. */ +#define NSH_CTX_HDRS_MAX_LEN 248 + +static inline struct nshhdr *nsh_hdr(struct sk_buff *skb) +{ + return (struct nshhdr *)skb_network_header(skb); +} + +static inline u16 nsh_hdr_len(const struct nshhdr *nsh) +{ + return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK) + >> NSH_LEN_SHIFT) << 2; +} + +static inline u8 nsh_get_ver(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK) + >> NSH_VER_SHIFT; +} + +static inline u8 nsh_get_flags(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK) + >> NSH_FLAGS_SHIFT; +} + +static inline u8 nsh_get_ttl(const struct nshhdr *nsh) +{ + return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK) + >> NSH_TTL_SHIFT; +} + +static inline void __nsh_set_xflag(struct nshhdr *nsh, u16 xflag, u16 xmask) +{ + nsh->ver_flags_ttl_len + = (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag); +} + +static inline void nsh_set_flags_and_ttl(struct nshhdr *nsh, u8 flags, u8 ttl) +{ + __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) | + ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK), + NSH_FLAGS_MASK | NSH_TTL_MASK); +} + +static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags, + u8 ttl, u8 len) +{ + len = len >> 2; + __nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) | + ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) | + ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK), + NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); +} + +#endif /* __NET_NSH_H */ -- cgit From 1b70d792cf6775fb5d0737524387893daeb5374a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Aug 2017 13:53:34 -0700 Subject: ipv6: Use rt6i_idev index for echo replies to a local address Tariq repored local pings to linklocal address is failing: $ ifconfig ens8 ens8: flags=4163 mtu 1500 inet 11.141.16.6 netmask 255.255.0.0 broadcast 11.141.255.255 inet6 fe80::7efe:90ff:fecb:7502 prefixlen 64 scopeid 0x20 ether 7c:fe:90:cb:75:02 txqueuelen 1000 (Ethernet) RX packets 12 bytes 1164 (1.1 KiB) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 30 bytes 2484 (2.4 KiB) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 $ /bin/ping6 -c 3 fe80::7efe:90ff:fecb:7502%ens8 PING fe80::7efe:90ff:fecb:7502%ens8(fe80::7efe:90ff:fecb:7502) 56 data bytes Signed-off-by: David S. Miller --- include/net/ip6_route.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 882bc3c7ccde..ee96f402cb75 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -164,6 +164,16 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); +static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) +{ + const struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt6 = NULL; + + if (dst) + rt6 = container_of(dst, struct rt6_info, dst); + + return rt6; +} /* * Store a destination cache entry in a socket -- cgit From 0dd5759dbb1c9a862e7d90c09d6cf398c45f1100 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 29 Aug 2017 13:17:51 -0700 Subject: net: remove dmaengine.h inclusion from netdevice.h Since the removal of NET_DMA, dmaengine.h header file shouldn't be needed by netdevice.h anymore. Signed-off-by: Dave Jiang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c5475b37a631..29bf06ff157c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -35,7 +35,6 @@ #include #include -#include #include #include -- cgit From eaa72dc47488d599439cd0fd0f8c4f1bcb3906bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Aug 2017 15:16:01 -0700 Subject: neigh: increase queue_len_bytes to match wmem_default Florian reported UDP xmit drops that could be root caused to the too small neigh limit. Current limit is 64 KB, meaning that even a single UDP socket would hit it, since its default sk_sndbuf comes from net.core.wmem_default (~212992 bytes on 64bit arches). Once ARP/ND resolution is in progress, we should allow a little more packets to be queued, at least for one producer. Once neigh arp_queue is filled, a rogue socket should hit its sk_sndbuf limit and either block in sendmsg() or return -EAGAIN. Signed-off-by: Eric Dumazet Reported-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 1c2912d433e8..03a362568357 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2368,6 +2368,16 @@ bool sk_net_capable(const struct sock *sk, int cap); void sk_get_meminfo(const struct sock *sk, u32 *meminfo); +/* Take into consideration the size of the struct sk_buff overhead in the + * determination of these values, since that is non-constant across + * platforms. This makes socket queueing behavior and performance + * not depend upon such differences. + */ +#define _SK_MEM_PACKETS 256 +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) +#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) +#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) + extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -- cgit From dc2251bf98c66db3f4e055b751968f0871037ae4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Aug 2017 23:19:57 +0200 Subject: cpuidle: Eliminate the CPUIDLE_DRIVER_STATE_START symbol On some architectures the first (index 0) idle state is a polling one and it doesn't really save energy, so there is the CPUIDLE_DRIVER_STATE_START symbol allowing some pieces of cpuidle code to avoid using that state. However, this makes the code rather hard to follow. It is better to explicitly avoid the polling state, so add a new cpuidle state flag CPUIDLE_FLAG_POLLING to mark it and make the relevant code check that flag for the first state instead of using the CPUIDLE_DRIVER_STATE_START symbol. In the ACPI processor driver that cannot always rely on the state flags (like before the states table has been set up) define a new internal symbol ACPI_IDLE_STATE_START equivalent to the CPUIDLE_DRIVER_STATE_START one and drop the latter. Signed-off-by: Rafael J. Wysocki Tested-by: Sudeep Holla Acked-by: Daniel Lezcano --- include/linux/cpuidle.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index fc1e5d7fc1c7..b58d952a00e2 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -63,6 +63,7 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_NONE (0x00) +#define CPUIDLE_FLAG_POLLING (0x01) /* polling state */ #define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ #define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ @@ -250,12 +251,6 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} #endif -#ifdef CONFIG_ARCH_HAS_CPU_RELAX -#define CPUIDLE_DRIVER_STATE_START 1 -#else -#define CPUIDLE_DRIVER_STATE_START 0 -#endif - #define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \ ({ \ int __ret; \ -- cgit From 34c2f65b718d44ea7d7b3cc10777f410677455ce Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Aug 2017 23:21:07 +0200 Subject: cpuidle: Move polling state initialization code to separate file Move the polling state initialization code to a separate file built conditionally on CONFIG_ARCH_HAS_CPU_RELAX to get rid of the #ifdef in driver.c. Signed-off-by: Rafael J. Wysocki Tested-by: Sudeep Holla Acked-by: Daniel Lezcano --- include/linux/cpuidle.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b58d952a00e2..561bc5365067 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -225,6 +225,12 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, } #endif +#ifdef CONFIG_ARCH_HAS_CPU_RELAX +void poll_idle_init(struct cpuidle_driver *drv); +#else +static void poll_idle_init(struct cpuidle_driver *drv) {} +#endif + /****************************** * CPUIDLE GOVERNOR INTERFACE * ******************************/ -- cgit From 1b39e3f813b4685c7a30ae964d5529a1b0e3a286 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Aug 2017 03:14:37 +0200 Subject: cpuidle: Make drivers initialize polling state Make the drivers that want to include the polling state into their states table initialize it explicitly and drop the initialization of it (which in fact is conditional, but that is not obvious from the code) from the core. Signed-off-by: Rafael J. Wysocki Tested-by: Sudeep Holla Acked-by: Daniel Lezcano --- include/linux/cpuidle.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 561bc5365067..5baacd3a0559 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -226,9 +226,9 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, #endif #ifdef CONFIG_ARCH_HAS_CPU_RELAX -void poll_idle_init(struct cpuidle_driver *drv); +void cpuidle_poll_state_init(struct cpuidle_driver *drv); #else -static void poll_idle_init(struct cpuidle_driver *drv) {} +static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {} #endif /****************************** -- cgit From e931d0dab4393f7195f84f42ed6fd973c26f62f1 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 29 Aug 2017 14:20:20 +0100 Subject: ACPI / APEI: Suppress message if HEST not present According to the ACPI specification, firmware is not required to provide the Hardware Error Source Table (HEST). When HEST is not present, the following superfluous message is printed to the kernel boot log - [ 3.460067] GHES: HEST is not enabled! Extend hest_disable variable to track whether the firmware provides this table and if it is not present skip any log output. The existing behaviour is preserved in all other cases. Suggested-by: Borislav Petkov Signed-off-by: Punit Agrawal Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- include/acpi/apei.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/apei.h b/include/acpi/apei.h index 76284bb560a6..c46694abea28 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -16,7 +16,13 @@ #ifdef __KERNEL__ -extern bool hest_disable; +enum hest_status { + HEST_ENABLED, + HEST_DISABLED, + HEST_NOT_FOUND, +}; + +extern int hest_disable; extern int erst_disable; #ifdef CONFIG_ACPI_APEI_GHES extern bool ghes_disable; -- cgit From 26e3e3cb05604073f2e9969fb0f06f35eb3b3313 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 29 Aug 2017 08:50:11 -0700 Subject: scsi: rcu: Introduce rcu_swap_protected() A common pattern in RCU code is to assign a new value to an RCU pointer after having read and stored the old value. Introduce a macro for this pattern. Signed-off-by: Bart Van Assche Acked-by: Paul E. McKenney Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Shane M Seymour Signed-off-by: Martin K. Petersen --- include/linux/rcupdate.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f816fc72b51e..8e920f0ecb07 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -407,6 +407,22 @@ static inline void rcu_preempt_sleep_check(void) { } _r_a_p__v; \ }) +/** + * rcu_swap_protected() - swap an RCU and a regular pointer + * @rcu_ptr: RCU pointer + * @ptr: regular pointer + * @c: the conditions under which the dereference will take place + * + * Perform swap(@rcu_ptr, @ptr) where @rcu_ptr is an RCU-annotated pointer and + * @c is the argument that is passed to the rcu_dereference_protected() call + * used to read that pointer. + */ +#define rcu_swap_protected(rcu_ptr, ptr, c) do { \ + typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + (ptr) = __tmp; \ +} while (0) + /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read -- cgit From ccf1e0045eea8f98d60fc9327bcb14c958d2e4c7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 29 Aug 2017 08:50:13 -0700 Subject: scsi: Rework handling of scsi_device.vpd_pg8[03] Introduce struct scsi_vpd for the VPD page length, data and the RCU head that will be used to free the VPD data. Use kfree_rcu() instead of kfree() to free VPD data. Move the VPD buffer pointer check inside the RCU read lock in the sysfs code. Only annotate pointers that are shared across threads with __rcu. Use rcu_dereference() when dereferencing an RCU pointer. This patch suppresses about twenty sparse complaints about the vpd_pg8[03] pointers. This patch also fixes a race condition, namely that updating of the VPD pointers and length variables in struct scsi_device was not atomic with reference to the code reading these variables. See also "Does the update code tolerate concurrent accesses?" in Documentation/RCU/checklist.txt. Fixes: commit 09e2b0b14690 ("scsi: rescan VPD attributes") Signed-off-by: Bart Van Assche Acked-by: Hannes Reinecke Reviewed-by: Shane Seymour Cc: Christoph Hellwig Cc: Johannes Thumshirn Cc: Shane Seymour Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index f054f3f43c75..82e93ee94708 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -80,6 +80,18 @@ struct scsi_event { */ }; +/** + * struct scsi_vpd - SCSI Vital Product Data + * @rcu: For kfree_rcu(). + * @len: Length in bytes of @data. + * @data: VPD data as defined in various T10 SCSI standard documents. + */ +struct scsi_vpd { + struct rcu_head rcu; + int len; + unsigned char data[]; +}; + struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; @@ -122,10 +134,8 @@ struct scsi_device { const char * rev; /* ... "nullnullnullnull" before scan */ #define SCSI_VPD_PG_LEN 255 - int vpd_pg83_len; - unsigned char __rcu *vpd_pg83; - int vpd_pg80_len; - unsigned char __rcu *vpd_pg80; + struct scsi_vpd __rcu *vpd_pg83; + struct scsi_vpd __rcu *vpd_pg80; unsigned char current_tag; /* current tag */ struct scsi_target *sdev_target; /* used only for single_lun */ -- cgit From c1225f01af085aea9c9d094febf157de9d07d861 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 17:37:38 +0200 Subject: scsi: bsg-lib: pass the release callback through bsg_setup_queue The SAS code will need it. Also mark the name argument const to match bsg_register_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/linux/bsg-lib.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..1062f08e1a55 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -66,8 +66,9 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); -struct request_queue *bsg_setup_queue(struct device *dev, char *name, - bsg_job_fn *job_fn, int dd_job_size); +struct request_queue *bsg_setup_queue(struct device *dev, const char *name, + bsg_job_fn *job_fn, int dd_job_size, + void (*release)(struct device *)); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit From 651a013649943710a900551ec6e03d2084e1a65a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 17:37:41 +0200 Subject: scsi: scsi_transport_sas: switch to bsg-lib for SMP passthrough Simplify the SMP passthrough code by switching it to the generic bsg-lib helpers that abstract away the details of the request code, and gets drivers out of seeing struct scsi_request. For the libsas host SMP code there is a small behavior difference in that we now always clear the residual len for successful commands, similar to the three other SMP handler implementations. Given that there is no partial command handling in the host SMP handler this should not matter in practice. [mkp: typos and checkpatch fixes] Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/libsas.h | 3 --- include/scsi/scsi_transport_sas.h | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index e7c012ce5ecd..6c0dc6155ee7 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -721,9 +721,6 @@ extern int sas_slave_alloc(struct scsi_device *); extern int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg); extern int sas_drain_work(struct sas_ha_struct *ha); -extern int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, - struct request *req); - extern void sas_ssp_task_response(struct device *dev, struct sas_task *task, struct ssp_response_iu *iu); struct sas_phy *sas_get_local_phy(struct domain_device *dev); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 73d870918939..a23304b7fb2e 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -5,6 +5,7 @@ #include #include #include +#include struct scsi_transport_template; struct sas_rphy; @@ -176,7 +177,8 @@ struct sas_function_template { int (*phy_setup)(struct sas_phy *); void (*phy_release)(struct sas_phy *); int (*set_phy_speed)(struct sas_phy *, struct sas_phy_linkrates *); - int (*smp_handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); + void (*smp_handler)(struct bsg_job *, struct Scsi_Host *, + struct sas_rphy *); }; -- cgit From caf989c350e8e0b9584744b9005fc2c45ca30883 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 24 Aug 2017 12:51:30 +0530 Subject: rpmsg: glink: Introduce glink smem based transport The glink protocol supports different types of transports (shared memory). With the core protocol remaining the same, the way the transport's memory is probed and accessed is different. So add support for glink's smem based transports. Adding a new smem transport register function and the fifo accessors for the same. Acked-by: Arun Kumar Neelakantam Signed-off-by: Sricharan R Signed-off-by: Bjorn Andersson --- include/linux/rpmsg/qcom_glink.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/rpmsg/qcom_glink.h (limited to 'include') diff --git a/include/linux/rpmsg/qcom_glink.h b/include/linux/rpmsg/qcom_glink.h new file mode 100644 index 000000000000..a622f029836e --- /dev/null +++ b/include/linux/rpmsg/qcom_glink.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_RPMSG_QCOM_GLINK_H +#define _LINUX_RPMSG_QCOM_GLINK_H + +#include + +struct qcom_glink; + +#if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK_SMEM) + +struct qcom_glink *qcom_glink_smem_register(struct device *parent, + struct device_node *node); +void qcom_glink_smem_unregister(struct qcom_glink *glink); + +#else + +static inline struct qcom_glink * +qcom_glink_smem_register(struct device *parent, + struct device_node *node) +{ + return NULL; +} + +static inline void qcom_glink_smem_unregister(struct qcom_glink *glink) {} + +#endif + +#endif -- cgit From 603aa14d3daaa7073bab4c472025c4963030e0cc Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 21 Jun 2017 16:00:27 +0200 Subject: mmc: tmio, renesas-sdhi: add max_{segs, blk_count} to tmio_mmc_data Allow TMIO and SDHI driver implementations to provide values for max_segs and max_blk_count. A follow-up patch will set these values for Renesas Gen3 SoCs the using an SDHI driver. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Ai Kyuse Signed-off-by: Simon Horman Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 26e8f8c0a6db..18b17a39d465 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -128,6 +128,8 @@ struct tmio_mmc_data { unsigned int cd_gpio; int alignment_shift; dma_addr_t dma_rx_offset; + unsigned int max_blk_count; + unsigned short max_segs; void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); }; -- cgit From 1896f14006b28a7ed6881666d18a244827af0a5b Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 19 Jul 2017 15:50:56 +0800 Subject: mmc: core: remove check of host->removed for rescan routine The intention of this check was to prevent the conflict between hotplug and removing driver for whatever reason. Currently it doesn't improve anything and the following rescan process could still saftly perform the scan flow. So these code seems pointless now and let's remove them. Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ebd1cebbef0c..4617c21f97f7 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -328,9 +328,6 @@ struct mmc_host { unsigned int use_spi_crc:1; unsigned int claimed:1; /* host exclusively claimed */ unsigned int bus_dead:1; /* bus has been released */ -#ifdef CONFIG_MMC_DEBUG - unsigned int removed:1; /* host is being removed */ -#endif unsigned int can_retune:1; /* re-tuning can be used */ unsigned int doing_retune:1; /* re-tuning in progress */ unsigned int retune_now:1; /* do re-tuning at next req */ -- cgit From 4406ae215b5a1dd59d941c1323b9f40d241357ac Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 2 Aug 2017 11:12:42 +0800 Subject: mmc: core: correct taac parameter according to the specification Per the spec of JESD84-B51, section 7.3, replace tacc with taac to fix the obvious typo. Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 46c73e97e61f..279b39008a33 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -29,8 +29,8 @@ struct mmc_csd { unsigned char structure; unsigned char mmca_vsn; unsigned short cmdclass; - unsigned short tacc_clks; - unsigned int tacc_ns; + unsigned short taac_clks; + unsigned int taac_ns; unsigned int c_size; unsigned int r2w_factor; unsigned int max_dtr; -- cgit From f6f64ed868d32a121f1535da9f42791c91562e4a Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 24 Jul 2017 21:58:56 +0800 Subject: clk: sunxi-ng: Add interface to query or configure MMC timing modes. Starting with the A83T SoC, Allwinner introduced a new timing mode for its MMC clocks. The new mode changes how the MMC controller sample and output clocks are delayed to match chip and board specifics. There are two controls for this, one on the CCU side controlling how the clocks behave, and one in the MMC controller controlling what inputs to take and how to route them. In the old mode, the MMC clock had 2 child clocks providing the output and sample clocks, which could be delayed by a number of clock cycles measured from the MMC clock's parent. With the new mode, the 2 delay clocks are no longer active. Instead, the delays and associated controls are moved into the MMC controller. The output of the MMC clock is also halved. The difference in how things are wired between the modes means that the clock controls and the MMC controls must match. To achieve this in a clear, explicit way, we introduce two functions for the MMC driver to use: one queries the hardware for the current mode set, and the other allows the MMC driver to request a mode. Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Ulf Hansson --- include/linux/clk/sunxi-ng.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 include/linux/clk/sunxi-ng.h (limited to 'include') diff --git a/include/linux/clk/sunxi-ng.h b/include/linux/clk/sunxi-ng.h new file mode 100644 index 000000000000..990f760f70e5 --- /dev/null +++ b/include/linux/clk/sunxi-ng.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2017 Chen-Yu Tsai. All rights reserved. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_CLK_SUNXI_NG_H_ +#define _LINUX_CLK_SUNXI_NG_H_ + +#include + +#ifdef CONFIG_SUNXI_CCU +int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, bool new_mode); +int sunxi_ccu_get_mmc_timing_mode(struct clk *clk); +#else +static inline int sunxi_ccu_set_mmc_timing_mode(struct clk *clk, + bool new_mode) +{ + return -ENOTSUPP; +} + +static inline int sunxi_ccu_get_mmc_timing_mode(struct clk *clk) +{ + return -ENOTSUPP; +} +#endif + +#endif -- cgit From 5124b59202eb3118eba5ac2222dc00f3390549a8 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 9 Aug 2017 21:00:41 +0200 Subject: mmc: renesas_sdhi: use extra flag for CBSY usage There is one SDHI instance on Gen2 which does not have the CBSY bit. So, turn CBSY usage into an extra flag and set it accordingly. This has the additional advantage that we can also set it for other incarnations later. Signed-off-by: Wolfram Sang Tested-by: Chris Brandt Reviewed-by: Simon Horman Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 18b17a39d465..b572955e6de6 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -107,6 +107,9 @@ */ #define TMIO_MMC_CLK_ACTUAL BIT(10) +/* Some controllers have a CBSY bit */ +#define TMIO_MMC_HAVE_CBSY BIT(11) + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -- cgit From dc8d68bb6c6f7d7fb1aeb3450c0654598e4f4e52 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 10 Aug 2017 15:08:11 +0300 Subject: mmc: core: Remove unused MMC_CAP2_PACKED_CMD Packed commands support was removed but some bits got left behind. Remove them. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4617c21f97f7..e92629518f68 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -291,10 +291,6 @@ struct mmc_host { MMC_CAP2_HS200_1_2V_SDR) #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ -#define MMC_CAP2_PACKED_RD (1 << 12) /* Allow packed read */ -#define MMC_CAP2_PACKED_WR (1 << 13) /* Allow packed write */ -#define MMC_CAP2_PACKED_CMD (MMC_CAP2_PACKED_RD | \ - MMC_CAP2_PACKED_WR) #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ #define MMC_CAP2_HS400_1_8V (1 << 15) /* Can support HS400 1.8V */ #define MMC_CAP2_HS400_1_2V (1 << 16) /* Can support HS400 1.2V */ -- cgit From d2f82254e4e862662e7820953b049b7d4d660ec7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 10 Aug 2017 15:08:07 +0300 Subject: mmc: core: Add members to mmc_request and mmc_data for CQE's Most of the information needed to issue requests to a CQE is already in struct mmc_request and struct mmc_data. Add data block address, some flags, and the task id (tag), and allow for cmd being NULL which it is for CQE tasks. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 13 +++++++++++-- include/trace/events/mmc.h | 36 +++++++++++++++++++++++------------- 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index a0c63ea28796..bf1788a224e6 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -122,11 +122,18 @@ struct mmc_data { unsigned int timeout_clks; /* data timeout (in clocks) */ unsigned int blksz; /* data block size */ unsigned int blocks; /* number of blocks */ + unsigned int blk_addr; /* block address */ int error; /* data error */ unsigned int flags; -#define MMC_DATA_WRITE (1 << 8) -#define MMC_DATA_READ (1 << 9) +#define MMC_DATA_WRITE BIT(8) +#define MMC_DATA_READ BIT(9) +/* Extra flags used by CQE */ +#define MMC_DATA_QBR BIT(10) /* CQE queue barrier*/ +#define MMC_DATA_PRIO BIT(11) /* CQE high priority */ +#define MMC_DATA_REL_WR BIT(12) /* Reliable write */ +#define MMC_DATA_DAT_TAG BIT(13) /* Tag request */ +#define MMC_DATA_FORCED_PRG BIT(14) /* Forced programming */ unsigned int bytes_xfered; @@ -153,6 +160,8 @@ struct mmc_request { /* Allow other commands during this ongoing data transfer or busy wait */ bool cap_cmd_during_tfr; + + int tag; }; struct mmc_card; diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h index a72f9b94c80b..f30a99ac65b6 100644 --- a/include/trace/events/mmc.h +++ b/include/trace/events/mmc.h @@ -29,8 +29,10 @@ TRACE_EVENT(mmc_request_start, __field(unsigned int, sbc_flags) __field(unsigned int, sbc_retries) __field(unsigned int, blocks) + __field(unsigned int, blk_addr) __field(unsigned int, blksz) __field(unsigned int, data_flags) + __field(int, tag) __field(unsigned int, can_retune) __field(unsigned int, doing_retune) __field(unsigned int, retune_now) @@ -42,10 +44,10 @@ TRACE_EVENT(mmc_request_start, ), TP_fast_assign( - __entry->cmd_opcode = mrq->cmd->opcode; - __entry->cmd_arg = mrq->cmd->arg; - __entry->cmd_flags = mrq->cmd->flags; - __entry->cmd_retries = mrq->cmd->retries; + __entry->cmd_opcode = mrq->cmd ? mrq->cmd->opcode : 0; + __entry->cmd_arg = mrq->cmd ? mrq->cmd->arg : 0; + __entry->cmd_flags = mrq->cmd ? mrq->cmd->flags : 0; + __entry->cmd_retries = mrq->cmd ? mrq->cmd->retries : 0; __entry->stop_opcode = mrq->stop ? mrq->stop->opcode : 0; __entry->stop_arg = mrq->stop ? mrq->stop->arg : 0; __entry->stop_flags = mrq->stop ? mrq->stop->flags : 0; @@ -56,7 +58,9 @@ TRACE_EVENT(mmc_request_start, __entry->sbc_retries = mrq->sbc ? mrq->sbc->retries : 0; __entry->blksz = mrq->data ? mrq->data->blksz : 0; __entry->blocks = mrq->data ? mrq->data->blocks : 0; + __entry->blk_addr = mrq->data ? mrq->data->blk_addr : 0; __entry->data_flags = mrq->data ? mrq->data->flags : 0; + __entry->tag = mrq->tag; __entry->can_retune = host->can_retune; __entry->doing_retune = host->doing_retune; __entry->retune_now = host->retune_now; @@ -71,8 +75,8 @@ TRACE_EVENT(mmc_request_start, "cmd_opcode=%u cmd_arg=0x%x cmd_flags=0x%x cmd_retries=%u " "stop_opcode=%u stop_arg=0x%x stop_flags=0x%x stop_retries=%u " "sbc_opcode=%u sbc_arg=0x%x sbc_flags=0x%x sbc_retires=%u " - "blocks=%u block_size=%u data_flags=0x%x " - "can_retune=%u doing_retune=%u retune_now=%u " + "blocks=%u block_size=%u blk_addr=%u data_flags=0x%x " + "tag=%d can_retune=%u doing_retune=%u retune_now=%u " "need_retune=%d hold_retune=%d retune_period=%u", __get_str(name), __entry->mrq, __entry->cmd_opcode, __entry->cmd_arg, @@ -81,7 +85,8 @@ TRACE_EVENT(mmc_request_start, __entry->stop_flags, __entry->stop_retries, __entry->sbc_opcode, __entry->sbc_arg, __entry->sbc_flags, __entry->sbc_retries, - __entry->blocks, __entry->blksz, __entry->data_flags, + __entry->blocks, __entry->blk_addr, + __entry->blksz, __entry->data_flags, __entry->tag, __entry->can_retune, __entry->doing_retune, __entry->retune_now, __entry->need_retune, __entry->hold_retune, __entry->retune_period) @@ -108,6 +113,7 @@ TRACE_EVENT(mmc_request_done, __field(unsigned int, sbc_retries) __field(unsigned int, bytes_xfered) __field(int, data_err) + __field(int, tag) __field(unsigned int, can_retune) __field(unsigned int, doing_retune) __field(unsigned int, retune_now) @@ -119,10 +125,13 @@ TRACE_EVENT(mmc_request_done, ), TP_fast_assign( - __entry->cmd_opcode = mrq->cmd->opcode; - __entry->cmd_err = mrq->cmd->error; - memcpy(__entry->cmd_resp, mrq->cmd->resp, 4); - __entry->cmd_retries = mrq->cmd->retries; + __entry->cmd_opcode = mrq->cmd ? mrq->cmd->opcode : 0; + __entry->cmd_err = mrq->cmd ? mrq->cmd->error : 0; + __entry->cmd_resp[0] = mrq->cmd ? mrq->cmd->resp[0] : 0; + __entry->cmd_resp[1] = mrq->cmd ? mrq->cmd->resp[1] : 0; + __entry->cmd_resp[2] = mrq->cmd ? mrq->cmd->resp[2] : 0; + __entry->cmd_resp[3] = mrq->cmd ? mrq->cmd->resp[3] : 0; + __entry->cmd_retries = mrq->cmd ? mrq->cmd->retries : 0; __entry->stop_opcode = mrq->stop ? mrq->stop->opcode : 0; __entry->stop_err = mrq->stop ? mrq->stop->error : 0; __entry->stop_resp[0] = mrq->stop ? mrq->stop->resp[0] : 0; @@ -139,6 +148,7 @@ TRACE_EVENT(mmc_request_done, __entry->sbc_retries = mrq->sbc ? mrq->sbc->retries : 0; __entry->bytes_xfered = mrq->data ? mrq->data->bytes_xfered : 0; __entry->data_err = mrq->data ? mrq->data->error : 0; + __entry->tag = mrq->tag; __entry->can_retune = host->can_retune; __entry->doing_retune = host->doing_retune; __entry->retune_now = host->retune_now; @@ -154,7 +164,7 @@ TRACE_EVENT(mmc_request_done, "cmd_retries=%u stop_opcode=%u stop_err=%d " "stop_resp=0x%x 0x%x 0x%x 0x%x stop_retries=%u " "sbc_opcode=%u sbc_err=%d sbc_resp=0x%x 0x%x 0x%x 0x%x " - "sbc_retries=%u bytes_xfered=%u data_err=%d " + "sbc_retries=%u bytes_xfered=%u data_err=%d tag=%d " "can_retune=%u doing_retune=%u retune_now=%u need_retune=%d " "hold_retune=%d retune_period=%u", __get_str(name), __entry->mrq, @@ -170,7 +180,7 @@ TRACE_EVENT(mmc_request_done, __entry->sbc_resp[0], __entry->sbc_resp[1], __entry->sbc_resp[2], __entry->sbc_resp[3], __entry->sbc_retries, - __entry->bytes_xfered, __entry->data_err, + __entry->bytes_xfered, __entry->data_err, __entry->tag, __entry->can_retune, __entry->doing_retune, __entry->retune_now, __entry->need_retune, __entry->hold_retune, __entry->retune_period) -- cgit From c0020756315eebec58310aca42cf9fb73e1322eb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 19 Jul 2017 21:28:52 +0300 Subject: efi: switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Signed-off-by: Andy Shevchenko Acked-by: Ard Biesheuvel Signed-off-by: Christoph Hellwig --- include/linux/cper.h | 94 ++++++++++++++++++++++++++-------------------------- include/linux/efi.h | 4 +-- 2 files changed, 49 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/cper.h b/include/linux/cper.h index 4c671fc2081e..723e952fde0d 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -74,36 +74,36 @@ enum { * Corrected Machine Check */ #define CPER_NOTIFY_CMC \ - UUID_LE(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \ - 0xEB, 0xD4, 0xF8, 0x90) + GUID_INIT(0x2DCE8BB1, 0xBDD7, 0x450e, 0xB9, 0xAD, 0x9C, 0xF4, \ + 0xEB, 0xD4, 0xF8, 0x90) /* Corrected Platform Error */ #define CPER_NOTIFY_CPE \ - UUID_LE(0x4E292F96, 0xD843, 0x4a55, 0xA8, 0xC2, 0xD4, 0x81, \ - 0xF2, 0x7E, 0xBE, 0xEE) + GUID_INIT(0x4E292F96, 0xD843, 0x4a55, 0xA8, 0xC2, 0xD4, 0x81, \ + 0xF2, 0x7E, 0xBE, 0xEE) /* Machine Check Exception */ #define CPER_NOTIFY_MCE \ - UUID_LE(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \ - 0xE1, 0x49, 0x13, 0xBB) + GUID_INIT(0xE8F56FFE, 0x919C, 0x4cc5, 0xBA, 0x88, 0x65, 0xAB, \ + 0xE1, 0x49, 0x13, 0xBB) /* PCI Express Error */ #define CPER_NOTIFY_PCIE \ - UUID_LE(0xCF93C01F, 0x1A16, 0x4dfc, 0xB8, 0xBC, 0x9C, 0x4D, \ - 0xAF, 0x67, 0xC1, 0x04) + GUID_INIT(0xCF93C01F, 0x1A16, 0x4dfc, 0xB8, 0xBC, 0x9C, 0x4D, \ + 0xAF, 0x67, 0xC1, 0x04) /* INIT Record (for IPF) */ #define CPER_NOTIFY_INIT \ - UUID_LE(0xCC5263E8, 0x9308, 0x454a, 0x89, 0xD0, 0x34, 0x0B, \ - 0xD3, 0x9B, 0xC9, 0x8E) + GUID_INIT(0xCC5263E8, 0x9308, 0x454a, 0x89, 0xD0, 0x34, 0x0B, \ + 0xD3, 0x9B, 0xC9, 0x8E) /* Non-Maskable Interrupt */ #define CPER_NOTIFY_NMI \ - UUID_LE(0x5BAD89FF, 0xB7E6, 0x42c9, 0x81, 0x4A, 0xCF, 0x24, \ - 0x85, 0xD6, 0xE9, 0x8A) + GUID_INIT(0x5BAD89FF, 0xB7E6, 0x42c9, 0x81, 0x4A, 0xCF, 0x24, \ + 0x85, 0xD6, 0xE9, 0x8A) /* BOOT Error Record */ #define CPER_NOTIFY_BOOT \ - UUID_LE(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \ - 0xD4, 0x64, 0xB3, 0x8F) + GUID_INIT(0x3D61A466, 0xAB40, 0x409a, 0xA6, 0x98, 0xF3, 0x62, \ + 0xD4, 0x64, 0xB3, 0x8F) /* DMA Remapping Error */ #define CPER_NOTIFY_DMAR \ - UUID_LE(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ - 0x72, 0x2D, 0xEB, 0x41) + GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ + 0x72, 0x2D, 0xEB, 0x41) /* * Flags bits definitions for flags in struct cper_record_header @@ -170,50 +170,50 @@ enum { * Processor Generic */ #define CPER_SEC_PROC_GENERIC \ - UUID_LE(0x9876CCAD, 0x47B4, 0x4bdb, 0xB6, 0x5E, 0x16, 0xF1, \ - 0x93, 0xC4, 0xF3, 0xDB) + GUID_INIT(0x9876CCAD, 0x47B4, 0x4bdb, 0xB6, 0x5E, 0x16, 0xF1, \ + 0x93, 0xC4, 0xF3, 0xDB) /* Processor Specific: X86/X86_64 */ #define CPER_SEC_PROC_IA \ - UUID_LE(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \ - 0x24, 0x2B, 0x6E, 0x1D) + GUID_INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \ + 0x24, 0x2B, 0x6E, 0x1D) /* Processor Specific: IA64 */ #define CPER_SEC_PROC_IPF \ - UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \ - 0x80, 0xC7, 0x3C, 0x88, 0x81) + GUID_INIT(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \ + 0x80, 0xC7, 0x3C, 0x88, 0x81) /* Processor Specific: ARM */ #define CPER_SEC_PROC_ARM \ - UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \ - 0x1D, 0x5D, 0x46, 0xB0) + GUID_INIT(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \ + 0x1D, 0x5D, 0x46, 0xB0) /* Platform Memory */ #define CPER_SEC_PLATFORM_MEM \ - UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ - 0xED, 0x7C, 0x83, 0xB1) + GUID_INIT(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ + 0xED, 0x7C, 0x83, 0xB1) #define CPER_SEC_PCIE \ - UUID_LE(0xD995E954, 0xBBC1, 0x430F, 0xAD, 0x91, 0xB4, 0x4D, \ - 0xCB, 0x3C, 0x6F, 0x35) + GUID_INIT(0xD995E954, 0xBBC1, 0x430F, 0xAD, 0x91, 0xB4, 0x4D, \ + 0xCB, 0x3C, 0x6F, 0x35) /* Firmware Error Record Reference */ #define CPER_SEC_FW_ERR_REC_REF \ - UUID_LE(0x81212A96, 0x09ED, 0x4996, 0x94, 0x71, 0x8D, 0x72, \ - 0x9C, 0x8E, 0x69, 0xED) + GUID_INIT(0x81212A96, 0x09ED, 0x4996, 0x94, 0x71, 0x8D, 0x72, \ + 0x9C, 0x8E, 0x69, 0xED) /* PCI/PCI-X Bus */ #define CPER_SEC_PCI_X_BUS \ - UUID_LE(0xC5753963, 0x3B84, 0x4095, 0xBF, 0x78, 0xED, 0xDA, \ - 0xD3, 0xF9, 0xC9, 0xDD) + GUID_INIT(0xC5753963, 0x3B84, 0x4095, 0xBF, 0x78, 0xED, 0xDA, \ + 0xD3, 0xF9, 0xC9, 0xDD) /* PCI Component/Device */ #define CPER_SEC_PCI_DEV \ - UUID_LE(0xEB5E4685, 0xCA66, 0x4769, 0xB6, 0xA2, 0x26, 0x06, \ - 0x8B, 0x00, 0x13, 0x26) + GUID_INIT(0xEB5E4685, 0xCA66, 0x4769, 0xB6, 0xA2, 0x26, 0x06, \ + 0x8B, 0x00, 0x13, 0x26) #define CPER_SEC_DMAR_GENERIC \ - UUID_LE(0x5B51FEF7, 0xC79D, 0x4434, 0x8F, 0x1B, 0xAA, 0x62, \ - 0xDE, 0x3E, 0x2C, 0x64) + GUID_INIT(0x5B51FEF7, 0xC79D, 0x4434, 0x8F, 0x1B, 0xAA, 0x62, \ + 0xDE, 0x3E, 0x2C, 0x64) /* Intel VT for Directed I/O specific DMAr */ #define CPER_SEC_DMAR_VT \ - UUID_LE(0x71761D37, 0x32B2, 0x45cd, 0xA7, 0xD0, 0xB0, 0xFE, \ - 0xDD, 0x93, 0xE8, 0xCF) + GUID_INIT(0x71761D37, 0x32B2, 0x45cd, 0xA7, 0xD0, 0xB0, 0xFE, \ + 0xDD, 0x93, 0xE8, 0xCF) /* IOMMU specific DMAr */ #define CPER_SEC_DMAR_IOMMU \ - UUID_LE(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F, \ - 0xDF, 0xAA, 0x84, 0xEC) + GUID_INIT(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F, \ + 0xDF, 0xAA, 0x84, 0xEC) #define CPER_PROC_VALID_TYPE 0x0001 #define CPER_PROC_VALID_ISA 0x0002 @@ -290,10 +290,10 @@ struct cper_record_header { __u32 validation_bits; __u32 record_length; __u64 timestamp; - uuid_le platform_id; - uuid_le partition_id; - uuid_le creator_id; - uuid_le notification_type; + guid_t platform_id; + guid_t partition_id; + guid_t creator_id; + guid_t notification_type; __u64 record_id; __u32 flags; __u64 persistence_information; @@ -309,8 +309,8 @@ struct cper_section_descriptor { __u8 validation_bits; __u8 reserved; /* must be zero */ __u32 flags; - uuid_le section_type; - uuid_le fru_id; + guid_t section_type; + guid_t fru_id; __u32 section_severity; __u8 fru_text[20]; }; @@ -343,7 +343,7 @@ struct cper_sec_proc_ia { /* IA32/X64 Processor Error Information Structure */ struct cper_ia_err_info { - uuid_le err_type; + guid_t err_type; __u64 validation_bits; __u64 check_info; __u64 target_id; diff --git a/include/linux/efi.h b/include/linux/efi.h index 8269bcb8ccf7..7a322aed979f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -47,10 +47,10 @@ typedef u16 efi_char16_t; /* UNICODE character */ typedef u64 efi_physical_addr_t; typedef void *efi_handle_t; -typedef uuid_le efi_guid_t; +typedef guid_t efi_guid_t; #define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ - UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) + GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) /* * Generic EFI table header -- cgit From 223694b9ae8bfba99f3528d49d07a740af6ff95a Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Thu, 31 Aug 2017 11:22:49 +0800 Subject: nvme: fix the definition of the doorbell buffer config support bit NVMe 1.3 specification defines the Optional Admin Command Support feature flags, bit 8 set to '1' then the controller supports the Doorbell Buffer Config command. Bit 7 is used for Virtualization Mangement command. Signed-off-by: Changpeng Liu Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Fixes: f9f38e33 ("nvme: improve performance for virtual NVMe devices") Cc: stable@vger.kernel.org --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 25d8225dbd04..8efff888bd9b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -254,7 +254,7 @@ enum { NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, - NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, }; struct nvme_lbaf { -- cgit From d3bf68ae04c7e29ed3c30b7f4b1f0c6a4a11c7f1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Aug 2017 15:43:46 +0300 Subject: mmc: host: Add CQE interface Add CQE host operations, capabilities, and host members. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 6 ++++++ include/linux/mmc/host.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index bf1788a224e6..c6d17f626234 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -156,6 +156,12 @@ struct mmc_request { struct completion completion; struct completion cmd_completion; void (*done)(struct mmc_request *);/* completion function */ + /* + * Notify uppers layers (e.g. mmc block driver) that recovery is needed + * due to an error associated with the mmc_request. Currently used only + * by CQE. + */ + void (*recovery_notifier)(struct mmc_request *); struct mmc_host *host; /* Allow other commands during this ongoing data transfer or busy wait */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e92629518f68..f3f2d07feb2a 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -162,6 +162,50 @@ struct mmc_host_ops { unsigned int direction, int blk_size); }; +struct mmc_cqe_ops { + /* Allocate resources, and make the CQE operational */ + int (*cqe_enable)(struct mmc_host *host, struct mmc_card *card); + /* Free resources, and make the CQE non-operational */ + void (*cqe_disable)(struct mmc_host *host); + /* + * Issue a read, write or DCMD request to the CQE. Also deal with the + * effect of ->cqe_off(). + */ + int (*cqe_request)(struct mmc_host *host, struct mmc_request *mrq); + /* Free resources (e.g. DMA mapping) associated with the request */ + void (*cqe_post_req)(struct mmc_host *host, struct mmc_request *mrq); + /* + * Prepare the CQE and host controller to accept non-CQ commands. There + * is no corresponding ->cqe_on(), instead ->cqe_request() is required + * to deal with that. + */ + void (*cqe_off)(struct mmc_host *host); + /* + * Wait for all CQE tasks to complete. Return an error if recovery + * becomes necessary. + */ + int (*cqe_wait_for_idle)(struct mmc_host *host); + /* + * Notify CQE that a request has timed out. Return false if the request + * completed or true if a timeout happened in which case indicate if + * recovery is needed. + */ + bool (*cqe_timeout)(struct mmc_host *host, struct mmc_request *mrq, + bool *recovery_needed); + /* + * Stop all CQE activity and prepare the CQE and host controller to + * accept recovery commands. + */ + void (*cqe_recovery_start)(struct mmc_host *host); + /* + * Clear the queue and call mmc_cqe_request_done() on all requests. + * Requests that errored will have the error set on the mmc_request + * (data->error or cmd->error for DCMD). Requests that did not error + * will have zero data bytes transferred. + */ + void (*cqe_recovery_finish)(struct mmc_host *host); +}; + struct mmc_async_req { /* active mmc request */ struct mmc_request *mrq; @@ -303,6 +347,8 @@ struct mmc_host { #define MMC_CAP2_HS400_ES (1 << 20) /* Host supports enhanced strobe */ #define MMC_CAP2_NO_SD (1 << 21) /* Do not send SD commands during initialization */ #define MMC_CAP2_NO_MMC (1 << 22) /* Do not send (e)MMC commands during initialization */ +#define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */ +#define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */ mmc_pm_flag_t pm_caps; /* supported pm features */ @@ -386,6 +432,13 @@ struct mmc_host { int dsr_req; /* DSR value is valid */ u32 dsr; /* optional driver stage (DSR) value */ + /* Command Queue Engine (CQE) support */ + const struct mmc_cqe_ops *cqe_ops; + void *cqe_private; + int cqe_qdepth; + bool cqe_enabled; + bool cqe_on; + unsigned long private[0] ____cacheline_aligned; }; -- cgit From 906d5ff6188953f4981df39e9999f858542df9ce Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Aug 2017 15:43:44 +0300 Subject: mmc: core: Move mmc_start_areq() declaration mmc_start_areq() is an internal mmc core API. Move the declaration accordingly. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index c6d17f626234..927519385482 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -171,11 +171,7 @@ struct mmc_request { }; struct mmc_card; -struct mmc_async_req; -struct mmc_async_req *mmc_start_areq(struct mmc_host *host, - struct mmc_async_req *areq, - enum mmc_blk_status *ret_stat); void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq); int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, int retries); -- cgit From a0aa309c39de58b86b704654434431aeb5a8bdf1 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:55 +0300 Subject: IB/core: Add a generic way to execute an operation on a uobject The ioctl infrastructure treats all user-objects in the same manner. It gets objects ids from the user-space and by using the object type and type attributes mentioned in the object specification, it executes this required method. Passing an object id from the user-space as an attribute is carried out in three stages. The first is carried out before the actual handler and the last is carried out afterwards. The different supported operations are read, write, destroy and create. In the first stage, the former three actions just fetches the object from the repository (by using its id) and locks it. The last action allocates a new uobject. Afterwards, the second stage is carried out when the handler itself carries out the required modification of the object. The last stage is carried out after the handler finishes and commits the result. The former two operations just unlock the object. Destroy calls the "free object" operation, taking into account the object's type and releases the uobject as well. Creation just adds the new uobject to the repository, making the object visible to the application. In order to abstract these details from the ioctl infrastructure layer, we add uverbs_get_uobject_from_context and uverbs_finalize_object functions which corresponds to the first and last stages respectively. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/rdma/uverbs_ioctl.h (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h new file mode 100644 index 000000000000..6885b92db4a8 --- /dev/null +++ b/include/rdma/uverbs_ioctl.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_IOCTL_ +#define _UVERBS_IOCTL_ + +#include + +/* + * ======================================= + * Verbs action specifications + * ======================================= + */ + +enum uverbs_obj_access { + UVERBS_ACCESS_READ, + UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_NEW, + UVERBS_ACCESS_DESTROY +}; + +#endif + -- cgit From f43dbebfa32041826299bdccae0352887fa007ea Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:56 +0300 Subject: IB/core: Add support to finalize objects in one transaction The new ioctl based infrastructure either commits or rollbacks all objects of the method as one transaction. In order to do that, we introduce a notion of dealing with a collection of objects that are related to a specific method. This also requires adding a notion of a method and attribute. A method contains a hash of attributes, where each bucket contains several attributes. The attributes are hashed according to their namespace which resides in the four upper bits of the id. For example, an object could be a CQ, which has an action of CREATE_CQ. This action has multiple attributes. For example, the CQ's new handle and the comp_channel. Each layer in this hierarchy - objects, methods and attributes is split into namespaces. The basic example for that is one namespace representing the default entities and another one representing the driver specific entities. When declaring these methods and attributes, we actually declare their specifications. When a method is executed, we actually allocates some space to hold auxiliary information. This auxiliary information contains meta-data about the required objects, such as pointers to their type information, pointers to the uobjects themselves (if exist), etc. The specification, along with the auxiliary information we allocated and filled is given to the finalize_objects function. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 6885b92db4a8..d3ec02b7d937 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -41,6 +41,12 @@ * ======================================= */ +enum uverbs_attr_type { + UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_IDR, + UVERBS_ATTR_TYPE_FD, +}; + enum uverbs_obj_access { UVERBS_ACCESS_READ, UVERBS_ACCESS_WRITE, @@ -48,5 +54,52 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; +struct uverbs_attr_spec { + enum uverbs_attr_type type; + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u8 access; + } obj; +}; + +struct uverbs_attr_spec_hash { + size_t num_attrs; + struct uverbs_attr_spec attrs[0]; +}; + +struct uverbs_obj_attr { + struct ib_uobject *uobject; +}; + +struct uverbs_attr { + struct uverbs_obj_attr obj_attr; +}; + +struct uverbs_attr_bundle_hash { + /* if bit i is set, it means attrs[i] contains valid information */ + unsigned long *valid_bitmap; + size_t num_attrs; + /* + * arrays of attributes, each element corresponds to the specification + * of the attribute in the same index. + */ + struct uverbs_attr *attrs; +}; + +struct uverbs_attr_bundle { + size_t num_buckets; + struct uverbs_attr_bundle_hash hash[]; +}; + +static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_hash *attrs_hash, + unsigned int idx) +{ + return test_bit(idx, attrs_hash->valid_bitmap); +} + #endif -- cgit From 66b6bef2c4e06f8c7a0030445766bf868110c5a1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 30 Aug 2017 11:48:11 +0200 Subject: power: supply: bq24190_charger: Export 5V boost converter as regulator Register the 5V boost converter as a regulator named "usb_otg_vbus". This commit also adds support for bq24190_platform_data, through which non device-tree platforms can pass the regulator_init_data (containing mappings for the consumer amongst other things). Signed-off-by: Hans de Goede Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel --- include/linux/power/bq24190_charger.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/power/bq24190_charger.h (limited to 'include') diff --git a/include/linux/power/bq24190_charger.h b/include/linux/power/bq24190_charger.h new file mode 100644 index 000000000000..45ce7f116a91 --- /dev/null +++ b/include/linux/power/bq24190_charger.h @@ -0,0 +1,18 @@ +/* + * Platform data for the TI bq24190 battery charger driver. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _BQ24190_CHARGER_H_ +#define _BQ24190_CHARGER_H_ + +#include + +struct bq24190_platform_data { + const struct regulator_init_data *regulator_init_data; +}; + +#endif -- cgit From add02cfdc9bc2987b0121861d5bb0c7392865be9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Aug 2017 15:50:04 +0200 Subject: iommu: Introduce Interface for IOMMU TLB Flushing With the current IOMMU-API the hardware TLBs have to be flushed in every iommu_ops->unmap() call-back. For unmapping large amounts of address space, like it happens when a KVM domain with assigned devices is destroyed, this causes thousands of unnecessary TLB flushes in the IOMMU hardware because the unmap call-back runs for every unmapped physical page. With the TLB Flush Interface and the new iommu_unmap_fast() function introduced here the need to clean the hardware TLBs is removed from the unmapping code-path. Users of iommu_unmap_fast() have to explicitly call the TLB-Flush functions to sync the page-table changes to the hardware. Three functions for TLB-Flushes are introduced: * iommu_flush_tlb_all() - Flushes all TLB entries associated with that domain. TLBs entries are flushed when this function returns. * iommu_tlb_range_add() - This will add a given range to the flush queue for this domain. * iommu_tlb_sync() - Flushes all queued ranges from the hardware TLBs. Returns when the flush is finished. The semantic of this interface is intentionally similar to the iommu_gather_ops from the io-pgtable code. Cc: Alex Williamson Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f1ce8e517d8d..50be4fd338e4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -167,6 +167,10 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @map_sg: map a scatter-gather list of physically contiguous memory chunks + * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain + * @tlb_range_add: Add a given iova range to the flush queue for this domain + * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush + * queue * to an iommu domain * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping @@ -199,6 +203,10 @@ struct iommu_ops { size_t size); size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot); + void (*flush_iotlb_all)(struct iommu_domain *domain); + void (*iotlb_range_add)(struct iommu_domain *domain, + unsigned long iova, size_t size); + void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); @@ -286,7 +294,9 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size); + size_t size); +extern size_t iommu_unmap_fast(struct iommu_domain *domain, + unsigned long iova, size_t size); extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg,unsigned int nents, int prot); @@ -343,6 +353,25 @@ extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags); +static inline void iommu_flush_tlb_all(struct iommu_domain *domain) +{ + if (domain->ops->flush_iotlb_all) + domain->ops->flush_iotlb_all(domain); +} + +static inline void iommu_tlb_range_add(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + if (domain->ops->iotlb_range_add) + domain->ops->iotlb_range_add(domain, iova, size); +} + +static inline void iommu_tlb_sync(struct iommu_domain *domain) +{ + if (domain->ops->iotlb_sync) + domain->ops->iotlb_sync(domain); +} + static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) @@ -436,6 +465,12 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } +static inline int iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, + int gfp_order) +{ + return -ENODEV; +} + static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot) @@ -443,6 +478,19 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, return -ENODEV; } +static inline void iommu_flush_tlb_all(struct iommu_domain *domain) +{ +} + +static inline void iommu_tlb_range_add(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ +} + +static inline void iommu_tlb_sync(struct iommu_domain *domain) +{ +} + static inline int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t paddr, u64 size, int prot) -- cgit From 78f35473508118df5ea04b9515ac3f1aaec0a980 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 30 Aug 2017 09:16:38 -0700 Subject: dax: introduce a fs_dax_get_by_bdev() helper Add a helper that can replace the following common pattern: if (blk_queue_dax(bdev->bd_queue)) fs_dax_get_by_host(bdev->bd_disk->disk_name); This will be used to move dax_device lookup from iomap-operation time to fs-mount time. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dan Williams --- include/linux/dax.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index df97b7af7e2c..ac8afa18f707 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -57,6 +57,7 @@ static inline void fs_put_dax(struct dax_device *dax_dev) put_dax(dax_dev); } +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { @@ -71,6 +72,11 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host) static inline void fs_put_dax(struct dax_device *dax_dev) { } + +static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) +{ + return NULL; +} #endif int dax_read_lock(void); -- cgit From c1d2b4c3e204e602c97680335d082b8d012d08cd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 30 Aug 2017 19:24:57 +0200 Subject: tcp: Revert "tcp: remove CA_ACK_SLOWPATH" This change was a followup to the header prediction removal, so first revert this as a prerequisite to back out hp removal. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index c614ff135b66..c546d13ffbca 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -910,8 +910,9 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ - CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ + CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ + CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ + CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ }; /* -- cgit From 31770e34e43d6c8dee129bfee77e56c34e61f0e5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 30 Aug 2017 19:24:58 +0200 Subject: tcp: Revert "tcp: remove header prediction" This reverts commit 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78. Eric Dumazet says: We found at Google a significant regression caused by 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 tcp: remove header prediction In typical RPC (TCP_RR), when a TCP socket receives data, we now call tcp_ack() while we used to not call it. This touches enough cache lines to cause a slowdown. so problem does not seem to be HP removal itself but the tcp_ack() call. Therefore, it might be possible to remove HP after all, provided one finds a way to elide tcp_ack for most cases. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 23 +++++++++++++++++++++++ include/uapi/linux/snmp.h | 2 ++ 3 files changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 267164a1d559..4aa40ef02d32 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -147,6 +147,12 @@ struct tcp_sock { u16 tcp_header_len; /* Bytes of tcp header to send */ u16 gso_segs; /* Max number of segs per GSO packet */ +/* + * Header prediction flags + * 0x5?10 << 16 + snd_wnd in net byte order + */ + __be32 pred_flags; + /* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) diff --git a/include/net/tcp.h b/include/net/tcp.h index c546d13ffbca..9c3db054e47f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +{ + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + ntohl(TCP_FLAG_ACK) | + snd_wnd); +} + +static inline void tcp_fast_path_on(struct tcp_sock *tp) +{ + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); +} + +static inline void tcp_fast_path_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && + tp->rcv_wnd && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + !tp->urg_data) + tcp_fast_path_on(tp); +} + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b3f346fb9fe3..758f12b58541 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -184,7 +184,9 @@ enum LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */ LINUX_MIB_LISTENDROPS, /* ListenDrops */ + LINUX_MIB_TCPHPHITS, /* TCPHPHits */ LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */ + LINUX_MIB_TCPHPACKS, /* TCPHPAcks */ LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ -- cgit From b3cb5388499c5e219324bfe7da2e46cbad82bfcf Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 8 Aug 2017 13:17:00 -0500 Subject: net/mlx5: Skip mlx5_unload_one if mlx5_load_one fails There is an issue where the firmware fails during mlx5_load_one, the health_care timer detects the issue and schedules a health_care call. Then the mlx5_load_one detects the issue, cleans up and quits. Then the health_care starts and calls mlx5_unload_one to clean up the resources that no longer exist and causes kernel panic. The root cause is that the bit MLX5_INTERFACE_STATE_DOWN is not set after mlx5_load_one fails. The solution is removing the bit MLX5_INTERFACE_STATE_DOWN and quit mlx5_unload_one if the bit MLX5_INTERFACE_STATE_UP is not set. The bit MLX5_INTERFACE_STATE_DOWN is redundant and we can use MLX5_INTERFACE_STATE_UP instead. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..918f5e644506 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -673,9 +673,8 @@ enum mlx5_device_state { }; enum mlx5_interface_state { - MLX5_INTERFACE_STATE_DOWN = BIT(0), - MLX5_INTERFACE_STATE_UP = BIT(1), - MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2), + MLX5_INTERFACE_STATE_UP = BIT(0), + MLX5_INTERFACE_STATE_SHUTDOWN = BIT(1), }; enum mlx5_pci_status { -- cgit From 10a8d00707082955b177164d4b4e758ffcbd4017 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 9 Aug 2017 10:03:40 -0500 Subject: net/mlx5: Remove the flag MLX5_INTERFACE_STATE_SHUTDOWN MLX5_INTERFACE_STATE_SHUTDOWN is not used in the code. Fixes: 5fc7197d3a25 ("net/mlx5: Add pci shutdown callback") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 918f5e644506..205d82d4c468 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -674,7 +674,6 @@ enum mlx5_device_state { enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = BIT(0), - MLX5_INTERFACE_STATE_SHUTDOWN = BIT(1), }; enum mlx5_pci_status { -- cgit From 7373ae7e8f0bf2c0718422481da986db5058b005 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 29 Aug 2017 22:44:16 -0600 Subject: net: ether: Add support for multiplexing and aggregation type Define the Qualcomm multiplexing and aggregation (MAP) ether type 0x00F9. This is needed for receiving data in the MAP protocol like RMNET. This is not an officially registered ID. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 61f7ccce5b69..9037065e23d0 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -140,6 +140,9 @@ #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ #define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ +#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and + * aggregation protocol + */ /* * This is an Ethernet frame header. -- cgit From cdf4969c42a6c1a376dd03a9e846cf638d3cd4b1 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 29 Aug 2017 22:44:17 -0600 Subject: net: arp: Add support for raw IP device Define the raw IP type. This is needed for raw IP net devices like rmnet. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- include/uapi/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index cf73510b9238..a2a635620600 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -59,6 +59,7 @@ #define ARPHRD_LAPB 516 /* LAPB */ #define ARPHRD_DDCMP 517 /* Digital's DDCMP protocol */ #define ARPHRD_RAWHDLC 518 /* Raw HDLC */ +#define ARPHRD_RAWIP 519 /* Raw IP */ #define ARPHRD_TUNNEL 768 /* IPIP tunnel */ #define ARPHRD_TUNNEL6 769 /* IP6IP6 tunnel */ -- cgit From 7d8e8292013ab72ae1f1500cbc91f198ccb1826d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Aug 2017 16:13:25 +0200 Subject: ALSA: Get rid of card power_lock Currently we're taking power_lock at each card component for assuring the power-up sequence, but it doesn't help anything in the implementation at the moment: it just serializes unnecessarily the callers, but it doesn't protect about the power state change itself. It used to have some usefulness in the early days where we managed the PM manually. But now the suspend/resume core procedure is beyond our hands, and power_lock lost its meaning. This patch drops the power_lock from allover the places. There shouldn't be any issues by this change, as it's no helper regarding the power state change. Rather we'll get better performance by removing the serialization; which is the only slight concern of any behavior change, but it can't be a showstopper, after all. Signed-off-by: Takashi Iwai --- include/sound/core.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 357f36b5ee80..4104a9d1001f 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -136,7 +136,6 @@ struct snd_card { #ifdef CONFIG_PM unsigned int power_state; /* power state */ - struct mutex power_lock; /* power lock */ wait_queue_head_t power_sleep; #endif @@ -149,16 +148,6 @@ struct snd_card { #define dev_to_snd_card(p) container_of(p, struct snd_card, card_dev) #ifdef CONFIG_PM -static inline void snd_power_lock(struct snd_card *card) -{ - mutex_lock(&card->power_lock); -} - -static inline void snd_power_unlock(struct snd_card *card) -{ - mutex_unlock(&card->power_lock); -} - static inline unsigned int snd_power_get_state(struct snd_card *card) { return card->power_state; @@ -175,8 +164,6 @@ int snd_power_wait(struct snd_card *card, unsigned int power_state); #else /* ! CONFIG_PM */ -#define snd_power_lock(card) do { (void)(card); } while (0) -#define snd_power_unlock(card) do { (void)(card); } while (0) static inline int snd_power_wait(struct snd_card *card, unsigned int state) { return 0; } #define snd_power_get_state(card) ({ (void)(card); SNDRV_CTL_POWER_D0; }) #define snd_power_change_state(card, state) do { (void)(card); } while (0) -- cgit From 71ccef0df533cd9c8c6cbf1483a636a092088ab9 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Aug 2017 00:57:35 +0000 Subject: ASoC: add Component level set_sysclk In current ALSA SoC, Codec only has set_sysclk feature. Codec will be merged into Component in next generation ALSA SoC, thus current Codec specific feature need to be merged into it. This is glue patch for it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index fbf57ff75958..813cdb95e049 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -795,6 +795,10 @@ struct snd_soc_component_driver { int (*suspend)(struct snd_soc_component *); int (*resume)(struct snd_soc_component *); + /* component wide operations */ + int (*set_sysclk)(struct snd_soc_component *component, + int clk_id, int source, unsigned int freq, int dir); + /* DT */ int (*of_xlate_dai_name)(struct snd_soc_component *component, struct of_phandle_args *args, @@ -865,6 +869,9 @@ struct snd_soc_component { int (*suspend)(struct snd_soc_component *); int (*resume)(struct snd_soc_component *); + int (*set_sysclk)(struct snd_soc_component *component, + int clk_id, int source, unsigned int freq, int dir); + /* machine specific init */ int (*init)(struct snd_soc_component *component); @@ -1459,6 +1466,10 @@ void snd_soc_component_async_complete(struct snd_soc_component *component); int snd_soc_component_test_bits(struct snd_soc_component *component, unsigned int reg, unsigned int mask, unsigned int value); +/* component wide operations */ +int snd_soc_component_set_sysclk(struct snd_soc_component *component, + int clk_id, int source, unsigned int freq, int dir); + #ifdef CONFIG_REGMAP void snd_soc_component_init_regmap(struct snd_soc_component *component, -- cgit From ef641e5d5e6c7a07748239036b786a90ba9b9a95 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Aug 2017 00:57:51 +0000 Subject: ASoC: add Component level set_pll In current ALSA SoC, Codec only has set_pll feature. Codec will be merged into Component in next generation ALSA SoC, thus current Codec specific feature need to be merged into it. This is glue patch for it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 813cdb95e049..23681538c849 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -798,6 +798,8 @@ struct snd_soc_component_driver { /* component wide operations */ int (*set_sysclk)(struct snd_soc_component *component, int clk_id, int source, unsigned int freq, int dir); + int (*set_pll)(struct snd_soc_component *component, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out); /* DT */ int (*of_xlate_dai_name)(struct snd_soc_component *component, @@ -871,6 +873,8 @@ struct snd_soc_component { int (*set_sysclk)(struct snd_soc_component *component, int clk_id, int source, unsigned int freq, int dir); + int (*set_pll)(struct snd_soc_component *component, int pll_id, + int source, unsigned int freq_in, unsigned int freq_out); /* machine specific init */ int (*init)(struct snd_soc_component *component); @@ -1469,6 +1473,9 @@ int snd_soc_component_test_bits(struct snd_soc_component *component, /* component wide operations */ int snd_soc_component_set_sysclk(struct snd_soc_component *component, int clk_id, int source, unsigned int freq, int dir); +int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id, + int source, unsigned int freq_in, + unsigned int freq_out); #ifdef CONFIG_REGMAP -- cgit From 44c07365e9e2c87c7e04d63293618c391d1480ec Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 24 Aug 2017 00:58:07 +0000 Subject: ASoC: add Component level set_jack In current ALSA SoC, Codec only has set_jack feature. Codec will be merged into Component in next generation ALSA SoC, thus current Codec specific feature need to be merged into it. This is glue patch for it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 23681538c849..feb896815069 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -800,6 +800,8 @@ struct snd_soc_component_driver { int clk_id, int source, unsigned int freq, int dir); int (*set_pll)(struct snd_soc_component *component, int pll_id, int source, unsigned int freq_in, unsigned int freq_out); + int (*set_jack)(struct snd_soc_component *component, + struct snd_soc_jack *jack, void *data); /* DT */ int (*of_xlate_dai_name)(struct snd_soc_component *component, @@ -875,6 +877,8 @@ struct snd_soc_component { int clk_id, int source, unsigned int freq, int dir); int (*set_pll)(struct snd_soc_component *component, int pll_id, int source, unsigned int freq_in, unsigned int freq_out); + int (*set_jack)(struct snd_soc_component *component, + struct snd_soc_jack *jack, void *data); /* machine specific init */ int (*init)(struct snd_soc_component *component); -- cgit From 388f79fda74fd3d8700ed5d899573ec58c2e0253 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 30 Aug 2017 02:31:57 -0400 Subject: idr: Add new APIs to support unsigned long The following new APIs are added: int idr_alloc_ext(struct idr *idr, void *ptr, unsigned long *index, unsigned long start, unsigned long end, gfp_t gfp); void *idr_remove_ext(struct idr *idr, unsigned long id); void *idr_find_ext(const struct idr *idr, unsigned long id); void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/idr.h | 69 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/radix-tree.h | 21 ++++++++++++-- 2 files changed, 85 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index bf70b3ef0a07..7c3a365f7e12 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -80,19 +80,75 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) */ void idr_preload(gfp_t gfp_mask); -int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t); + +int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, + unsigned long start, unsigned long end, gfp_t gfp, + bool ext); + +/** + * idr_alloc - allocate an id + * @idr: idr handle + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive) + * @gfp: memory allocation flags + * + * Allocates an unused ID in the range [start, end). Returns -ENOSPC + * if there are no unused IDs in that range. + * + * Note that @end is treated as max when <= 0. This is to always allow + * using @start + N as @end as long as N is inside integer range. + * + * Simultaneous modifications to the @idr are not allowed and should be + * prevented by the user, usually with a lock. idr_alloc() may be called + * concurrently with read-only accesses to the @idr, such as idr_find() and + * idr_for_each_entry(). + */ +static inline int idr_alloc(struct idr *idr, void *ptr, + int start, int end, gfp_t gfp) +{ + unsigned long id; + int ret; + + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; + + ret = idr_alloc_cmn(idr, ptr, &id, start, end, gfp, false); + + if (ret) + return ret; + + return id; +} + +static inline int idr_alloc_ext(struct idr *idr, void *ptr, + unsigned long *index, + unsigned long start, + unsigned long end, + gfp_t gfp) +{ + return idr_alloc_cmn(idr, ptr, index, start, end, gfp, true); +} + int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); +void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); void *idr_replace(struct idr *, void *, int id); +void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void idr_destroy(struct idr *); -static inline void *idr_remove(struct idr *idr, int id) +static inline void *idr_remove_ext(struct idr *idr, unsigned long id) { return radix_tree_delete_item(&idr->idr_rt, id, NULL); } +static inline void *idr_remove(struct idr *idr, int id) +{ + return idr_remove_ext(idr, id); +} + static inline void idr_init(struct idr *idr) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); @@ -128,11 +184,16 @@ static inline void idr_preload_end(void) * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. */ -static inline void *idr_find(const struct idr *idr, int id) +static inline void *idr_find_ext(const struct idr *idr, unsigned long id) { return radix_tree_lookup(&idr->idr_rt, id); } +static inline void *idr_find(const struct idr *idr, int id) +{ + return idr_find_ext(idr, id); +} + /** * idr_for_each_entry - iterate over an idr's elements of a given type * @idr: idr handle @@ -145,6 +206,8 @@ static inline void *idr_find(const struct idr *idr, int id) */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) +#define idr_for_each_entry_ext(idr, entry, id) \ + for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id) /** * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 3e5735064b71..567ebb5eaab0 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -357,8 +357,25 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); -void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, - gfp_t, int end); + +void __rcu **idr_get_free_cmn(struct radix_tree_root *root, + struct radix_tree_iter *iter, gfp_t gfp, + unsigned long max); +static inline void __rcu **idr_get_free(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, + int end) +{ + return idr_get_free_cmn(root, iter, gfp, end > 0 ? end - 1 : INT_MAX); +} + +static inline void __rcu **idr_get_free_ext(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, + unsigned long end) +{ + return idr_get_free_cmn(root, iter, gfp, end - 1); +} enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ -- cgit From 65a206c01e8e7ffe971477a36419422099216eff Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 30 Aug 2017 02:31:59 -0400 Subject: net/sched: Change act_api and act_xxx modules to use IDR Typically, each TC filter has its own action. All the actions of the same type are saved in its hash table. But the hash buckets are too small that it degrades to a list. And the performance is greatly affected. For example, it takes about 0m11.914s to insert 64K rules. If we convert the hash table to IDR, it only takes about 0m1.500s. The improvement is huge. But please note that the test result is based on previous patch that cls_flower uses IDR. Signed-off-by: Chris Mi Signed-off-by: Jiri Pirko Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 76 ++++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 26ffd8333f50..8f3d5d8b5ae0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -10,12 +10,9 @@ #include #include - -struct tcf_hashinfo { - struct hlist_head *htab; - unsigned int hmask; - spinlock_t lock; - u32 index; +struct tcf_idrinfo { + spinlock_t lock; + struct idr action_idr; }; struct tc_action_ops; @@ -25,9 +22,8 @@ struct tc_action { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; struct list_head list; - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; - struct hlist_node tcfa_head; u32 tcfa_index; int tcfa_refcnt; int tcfa_bindcnt; @@ -44,7 +40,6 @@ struct tc_action { struct tc_cookie *act_cookie; struct tcf_chain *goto_chain; }; -#define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt @@ -57,27 +52,6 @@ struct tc_action { #define tcf_lock common.tcfa_lock #define tcf_rcu common.tcfa_rcu -static inline unsigned int tcf_hash(u32 index, unsigned int hmask) -{ - return index & hmask; -} - -static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) -{ - int i; - - spin_lock_init(&hf->lock); - hf->index = 0; - hf->hmask = mask; - hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), - GFP_KERNEL); - if (!hf->htab) - return -ENOMEM; - for (i = 0; i < mask + 1; i++) - INIT_HLIST_HEAD(&hf->htab[i]); - return 0; -} - /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -126,53 +100,51 @@ struct tc_action_ops { }; struct tc_action_net { - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; }; static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, unsigned int mask) + const struct tc_action_ops *ops) { int err = 0; - tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL); - if (!tn->hinfo) + tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - err = tcf_hashinfo_init(tn->hinfo, mask); - if (err) - kfree(tn->hinfo); + spin_lock_init(&tn->idrinfo->lock); + idr_init(&tn->idrinfo->action_idr); return err; } -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo); +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo); static inline void tc_action_net_exit(struct tc_action_net *tn) { - tcf_hashinfo_destroy(tn->ops, tn->hinfo); - kfree(tn->hinfo); + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -u32 tcf_hash_new_index(struct tc_action_net *tn); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, int bind, - bool cpustats); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats); +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est); +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); -int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); +int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); -static inline int tcf_hash_release(struct tc_action *a, bool bind) +static inline int tcf_idr_release(struct tc_action *a, bool bind) { - return __tcf_hash_release(a, bind, false); + return __tcf_idr_release(a, bind, false); } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); -- cgit From 5c23f2dc8eeb5a6010cb66119d942361bc8ec833 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 30 Aug 2017 10:29:12 +0200 Subject: phy: add sgmii and 10gkr modes to the phy_mode enum This patch adds more generic PHY modes to the phy_mode enum, to allow configuring generic PHYs to the SGMII and/or the 10GKR mode by using the set_mode callback. Signed-off-by: Antoine Tenart Acked-by: Kishon Vijay Abraham I Signed-off-by: David S. Miller --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 78bb0d7f6b11..e694d4008c4a 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -27,6 +27,8 @@ enum phy_mode { PHY_MODE_USB_HOST, PHY_MODE_USB_DEVICE, PHY_MODE_USB_OTG, + PHY_MODE_SGMII, + PHY_MODE_10GKR, }; /** -- cgit From 2729984149e6a23e849a40e16fc3efdc07dd3668 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 13 Aug 2017 13:34:42 +0300 Subject: net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels Add TX offloads support for GRE tunneled packets by reporting the needed netdev features. Signed-off-by: Gal Pressman Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7d09b9c52f..3d5d32e5446c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -602,7 +602,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; - u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; u8 swp[0x1]; -- cgit From e3bfed1df379c18f20feb06427d952b766e2c00f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 25 Aug 2017 19:53:39 +1000 Subject: KVM: PPC: Book3S HV: Report storage key support to userspace This adds information about storage keys to the struct returned by the KVM_PPC_GET_SMMU_INFO ioctl. The new fields replace a pad field, which was zeroed by previous kernel versions. Thus userspace that knows about the new fields will see zeroes when running on an older kernel, indicating that storage keys are not supported. The size of the structure has not changed. The number of keys is hard-coded for the CPUs supported by HV KVM, which is just POWER7, POWER8 and POWER9. Signed-off-by: Paul Mackerras Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- include/uapi/linux/kvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6cd63c18708a..838887587411 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { struct kvm_ppc_smmu_info { __u64 flags; __u32 slb_size; - __u32 pad; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; -- cgit From 47ebcc0bb1d5eb7f1b1eeab675409ea7f67b4a5c Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Wed, 30 Aug 2017 11:30:39 +0300 Subject: xfrm: Add support for network devices capable of removing the ESP trailer In conjunction with crypto offload [1], removing the ESP trailer by hardware can potentially improve the performance by avoiding (1) a cache miss incurred by reading the nexthdr field and (2) the necessity to calculate the csum value of the trailer in order to keep skb->csum valid. This patch introduces the changes to the xfrm stack and merely serves as an infrastructure. Subsequent patch to mlx5 driver will put this to a good use. [1] https://www.mail-archive.com/netdev@vger.kernel.org/msg175733.html Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c7b70cce6d6..f002a2c5e33c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1019,6 +1019,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 +#define XFRM_ESP_NO_TRAILER 64 __u32 status; #define CRYPTO_SUCCESS 1 -- cgit From 6606bc9dee63ad8cda2cc310d2ad5992673a785a Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 17 Aug 2017 14:50:36 +0800 Subject: pinctrl: Add sleep related state to indicate sleep related configs In some scenarios, we should set some pins as input/output/pullup/pulldown when the specified system goes into deep sleep mode, then when the system goes into deep sleep mode, these pins will be set automatically by hardware. That means some pins are not controlled by any specific driver in the OS, but need to be controlled when entering sleep mode. Thus we introduce one sleep state config into pinconf-generic for users to configure. Signed-off-by: Baolin Wang Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index e91d1b6a260d..5d8bc7f21c2a 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -86,6 +86,7 @@ * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells * the driver which alternative power source to use. + * @PIN_CONFIG_SLEEP_HARDWARE_STATE: indicate this is sleep related state. * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to * this parameter (on a custom format) tells the driver which alternative * slew rate to use. @@ -114,6 +115,7 @@ enum pin_config_param { PIN_CONFIG_OUTPUT_ENABLE, PIN_CONFIG_OUTPUT, PIN_CONFIG_POWER_SOURCE, + PIN_CONFIG_SLEEP_HARDWARE_STATE, PIN_CONFIG_SLEW_RATE, PIN_CONFIG_END = 0x7F, PIN_CONFIG_MAX = 0xFF, -- cgit From 72f9b089ecd2cc2194d27cbb14fd80a0b1472e89 Mon Sep 17 00:00:00 2001 From: Aditya Sarwade Date: Tue, 29 Aug 2017 15:51:29 -0700 Subject: RDMA/vmw_pvrdma: Report network header type in WC We should report the network header type in the work completion so that the kernel can infer the right RoCE type headers. Reviewed-by: Bryan Tan Signed-off-by: Aditya Sarwade Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- include/uapi/rdma/vmw_pvrdma-abi.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c8c1d2d6df4d..c6569b0032ec 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -125,7 +125,8 @@ enum pvrdma_wc_flags { PVRDMA_WC_IP_CSUM_OK = 1 << 3, PVRDMA_WC_WITH_SMAC = 1 << 4, PVRDMA_WC_WITH_VLAN = 1 << 5, - PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN, + PVRDMA_WC_WITH_NETWORK_HDR_TYPE = 1 << 6, + PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; struct pvrdma_alloc_ucontext_resp { @@ -283,7 +284,8 @@ struct pvrdma_cqe { __u8 dlid_path_bits; __u8 port_num; __u8 smac[6]; - __u8 reserved2[7]; /* Pad to next power of 2 (64). */ + __u8 network_hdr_type; + __u8 reserved2[6]; /* Pad to next power of 2 (64). */ }; #endif /* __VMW_PVRDMA_ABI_H__ */ -- cgit From fac9658cabb98afb68ef1630c558864e6f559c07 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:57 +0300 Subject: IB/core: Add new ioctl interface In this ioctl interface, processing the command starts from properties of the command and fetching the appropriate user objects before calling the handler. Parsing and validation is done according to a specifier declared by the driver's code. In the driver, all supported objects are declared. These objects are separated to different object namepsaces. Dividing objects to namespaces is done at initialization by using the higher bits of the object ids. This initialization can mix objects declared in different places to one parsing tree using in this ioctl interface. For each object we list all supported methods. Similarly to objects, methods are separated to method namespaces too. Namespacing is done similarly to the objects case. This could be used in order to add methods to an existing object. Each method has a specific handler, which could be either a default handler or a driver specific handler. Along with the handler, a bunch of attributes are specified as well. Similarly to objects and method, attributes are namespaced and hashed by their ids at initialization too. All supported attributes are subject to automatic fetching and validation. These attributes include the command, response and the method's related objects' ids. When these entities (objects, methods and attributes) are used, the high bits of the entities ids are used in order to calculate the hash bucket index. Then, these high bits are masked out in order to have a zero based index. Since we use these high bits for both bucketing and namespacing, we get a compact representation and O(1) array access. This is mandatory for efficient dispatching. Each attribute has a type (PTR_IN, PTR_OUT, IDR and FD) and a length. Attributes could be validated through some attributes, like: (*) Minimum size / Exact size (*) Fops for FD (*) Object type for IDR If an IDR/fd attribute is specified, the kernel also states the object type and the required access (NEW, WRITE, READ or DESTROY). All uobject/fd management is done automatically by the infrastructure, meaning - the infrastructure will fail concurrent commands that at least one of them requires concurrent access (WRITE/DESTROY), synchronize actions with device removals (dissociate context events) and take care of reference counting (increase/decrease) for concurrent actions invocation. The reference counts on the actual kernel objects shall be handled by the handlers. objects +--------+ | | | | methods +--------+ | | ns method method_spec +-----+ |len | +--------+ +------+[d]+-------+ +----------------+[d]+------------+ |attr1+-> |type | | object +> |method+-> | spec +-> + attr_buckets +-> |default_chain+--> +-----+ |idr_type| +--------+ +------+ |handler| | | +------------+ |attr2| |access | | | | | +-------+ +----------------+ |driver chain| +-----+ +--------+ | | | | +------------+ | | +------+ | | | | | | | | | | | | | | | | | | | | +--------+ [d] = Hash ids to groups using the high order bits The right types table is also chosen by using the high bits from the ids. Currently we have either default or driver specific groups. Once validation and object fetching (or creation) completed, we call the handler: int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *ctx); ctx bundles attributes of different namespaces. Each element there is an array of attributes which corresponds to one namespaces of attributes. For example, in the usually used case: ctx core +----------------------------+ +------------+ | core: +---> | valid | +----------------------------+ | cmd_attr | | driver: | +------------+ |----------------------------+--+ | valid | | | cmd_attr | | +------------+ | | valid | | | obj_attr | | +------------+ | | drivers | +------------+ +> | valid | | cmd_attr | +------------+ | valid | | cmd_attr | +------------+ | valid | | obj_attr | +------------+ Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 + include/rdma/uverbs_ioctl.h | 101 ++++++++++++++++++++++++++++++++---- include/uapi/rdma/rdma_user_ioctl.h | 33 ++++++++++++ 3 files changed, 127 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1b4bb8743969..e6df68048517 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2348,6 +2348,8 @@ struct ib_device { void (*get_dev_fw_str)(struct ib_device *, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); + + struct uverbs_root_spec *specs_root; }; struct ib_client { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index d3ec02b7d937..f83f56329761 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -43,6 +43,8 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, }; @@ -54,29 +56,110 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; +enum { + UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, + /* Support extending attributes by length */ + UVERBS_ATTR_SPEC_F_MIN_SZ = 1U << 1, +}; + struct uverbs_attr_spec { enum uverbs_attr_type type; - struct { - /* - * higher bits mean the namespace and lower bits mean - * the type id within the namespace. - */ - u16 obj_type; - u8 access; - } obj; + union { + u16 len; + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u8 access; + } obj; + }; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; }; struct uverbs_attr_spec_hash { size_t num_attrs; + unsigned long *mandatory_attrs_bitmask; struct uverbs_attr_spec attrs[0]; }; +struct uverbs_attr_bundle; +struct ib_uverbs_file; + +enum { + /* + * Action marked with this flag creates a context (or root for all + * objects). + */ + UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0, +}; + +struct uverbs_method_spec { + /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ + u32 flags; + size_t num_buckets; + size_t num_child_attrs; + int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + struct uverbs_attr_spec_hash *attr_buckets[0]; +}; + +struct uverbs_method_spec_hash { + size_t num_methods; + struct uverbs_method_spec *methods[0]; +}; + +struct uverbs_object_spec { + const struct uverbs_obj_type *type_attrs; + size_t num_buckets; + struct uverbs_method_spec_hash *method_buckets[0]; +}; + +struct uverbs_object_spec_hash { + size_t num_objects; + struct uverbs_object_spec *objects[0]; +}; + +struct uverbs_root_spec { + size_t num_buckets; + struct uverbs_object_spec_hash *object_buckets[0]; +}; + +/* ================================================= + * Parsing infrastructure + * ================================================= + */ + +struct uverbs_ptr_attr { + union { + u64 data; + void __user *ptr; + }; + u16 len; + /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ + u16 flags; +}; + struct uverbs_obj_attr { + /* pointer to the kernel descriptor -> type, access, etc */ + const struct uverbs_obj_type *type; struct ib_uobject *uobject; + /* fd or id in idr of this object */ + int id; }; struct uverbs_attr { - struct uverbs_obj_attr obj_attr; + /* + * pointer to the user-space given attribute, in order to write the + * new uobject's id or update flags. + */ + struct ib_uverbs_attr __user *uattr; + union { + struct uverbs_ptr_attr ptr_attr; + struct uverbs_obj_attr obj_attr; + }; }; struct uverbs_attr_bundle_hash { diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index 9388125ad51b..165a27e969d5 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -43,6 +43,39 @@ /* Legacy name, for user space application which already use it */ #define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC +#define RDMA_VERBS_IOCTL \ + _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) + +#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +enum { + /* User input */ + UVERBS_ATTR_F_MANDATORY = 1U << 0, + /* + * Valid output bit should be ignored and considered set in + * mandatory fields. This bit is kernel output. + */ + UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1, +}; + +struct ib_uverbs_attr { + __u16 attr_id; /* command specific type attribute */ + __u16 len; /* only for pointers */ + __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ + __u16 reserved; + __u64 data; /* ptr to command, inline data or idr/fd */ +}; + +struct ib_uverbs_ioctl_hdr { + __u16 length; + __u16 object_id; + __u16 method_id; + __u16 num_attrs; + __u64 reserved; + struct ib_uverbs_attr attrs[0]; +}; + /* * General blocks assignments * It is closed on purpose do not expose it it user space -- cgit From 5009010fbf54bdc27e57baca490e1f9d6a4609e0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:58 +0300 Subject: IB/core: Declare an object instead of declaring only type attributes Switch all uverbs_type_attrs_xxxx with DECLARE_UVERBS_OBJECT macros. This will be later used in order to embed the object specific methods in the objects as well. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 16 ++++++++++++++++ include/rdma/uverbs_std_types.h | 40 ++++++++++++++++++++-------------------- include/rdma/uverbs_types.h | 38 +++++++++++++++++++++++--------------- 3 files changed, 59 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index f83f56329761..99130083615e 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -127,6 +127,22 @@ struct uverbs_root_spec { struct uverbs_object_spec_hash *object_buckets[0]; }; +/* + * ======================================= + * Verbs definitions + * ======================================= + */ + +struct uverbs_object_def { + const struct uverbs_obj_type *type_attrs; +}; + +#define _UVERBS_OBJECT(_id, _type_attrs, ...) \ + ((const struct uverbs_object_def) { \ + .type_attrs = _type_attrs}) +#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ + const struct uverbs_object_def _name = \ + _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) /* ================================================= * Parsing infrastructure * ================================================= diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 7771ce966952..eda271b4aa6c 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -35,18 +35,18 @@ #include -extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_wq; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_srq; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_ah; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_flow; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd; +extern const struct uverbs_object_def uverbs_object_comp_channel; +extern const struct uverbs_object_def uverbs_object_cq; +extern const struct uverbs_object_def uverbs_object_qp; +extern const struct uverbs_object_def uverbs_object_rwq_ind_table; +extern const struct uverbs_object_def uverbs_object_wq; +extern const struct uverbs_object_def uverbs_object_srq; +extern const struct uverbs_object_def uverbs_object_ah; +extern const struct uverbs_object_def uverbs_object_flow; +extern const struct uverbs_object_def uverbs_object_mr; +extern const struct uverbs_object_def uverbs_object_mw; +extern const struct uverbs_object_def uverbs_object_pd; +extern const struct uverbs_object_def uverbs_object_xrcd; static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, @@ -56,22 +56,22 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, return rdma_lookup_get_uobject(type, ucontext, id, write); } -#define uobj_get_type(_type) uverbs_type_attrs_##_type.type +#define uobj_get_type(_object) uverbs_object_##_object.type_attrs #define uobj_get_read(_type, _id, _ucontext) \ - __uobj_get(&(_type), false, _ucontext, _id) + __uobj_get(_type, false, _ucontext, _id) -#define uobj_get_obj_read(_type, _id, _ucontext) \ +#define uobj_get_obj_read(_object, _id, _ucontext) \ ({ \ - struct ib_uobject *uobj = \ - __uobj_get(&uobj_get_type(_type), \ + struct ib_uobject *__uobj = \ + __uobj_get(uverbs_object_##_object.type_attrs, \ false, _ucontext, _id); \ \ - (struct ib_##_type *)(IS_ERR(uobj) ? NULL : uobj->object); \ + (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\ }) #define uobj_get_write(_type, _id, _ucontext) \ - __uobj_get(&(_type), true, _ucontext, _id) + __uobj_get(_type, true, _ucontext, _id) static inline void uobj_put_read(struct ib_uobject *uobj) { @@ -108,7 +108,7 @@ static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type } #define uobj_alloc(_type, ucontext) \ - __uobj_alloc(&(_type), ucontext) + __uobj_alloc(_type, ucontext) #endif diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 351ea185df44..9760b6d70744 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -151,22 +151,30 @@ extern const struct uverbs_obj_type_class uverbs_fd_class; #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) -#define UVERBS_TYPE_ALLOC_FD(_size, _order) \ - { \ - .destroy_order = _order, \ - .type_class = &uverbs_fd_class, \ - .obj_size = (_size) + \ - UVERBS_BUILD_BUG_ON((_size) < \ - sizeof(struct ib_uobject_file)),\ - } -#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \ - { \ +#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\ + ((&((const struct uverbs_obj_fd_type) \ + {.type = { \ + .destroy_order = _order, \ + .type_class = &uverbs_fd_class, \ + .obj_size = (_obj_size) + \ + UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \ + }, \ + .context_closed = _context_closed, \ + .fops = _fops, \ + .name = _name, \ + .flags = _flags}))->type) +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object) \ + ((&((const struct uverbs_obj_idr_type) \ + {.type = { \ .destroy_order = _order, \ .type_class = &uverbs_idr_class, \ .obj_size = (_size) + \ - UVERBS_BUILD_BUG_ON((_size) < \ - sizeof(struct ib_uobject)), \ - } -#define UVERBS_TYPE_ALLOC_IDR(_order) \ - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order) + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject)) \ + }, \ + .destroy_object = _destroy_object,}))->type) +#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, \ + _destroy_object) + #endif -- cgit From 09e3ebf8c193d3f154c4ffb7cb18995df0243bc6 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:59 +0300 Subject: IB/core: Add DEVICE object and root tree structure This adds the DEVICE object. This object supports creating the context that all objects are created from. Moreover, it supports executing methods which are related to the device itself, such as QUERY_DEVICE. This is a singleton object (per file instance). All standard objects are put in the root structure. This root will later on be used in drivers as the source for their whole parsing tree. Later on, when new features are added, these drivers could mix this root with other customized objects. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 35 +++++++++++++++++++++++++++++++++++ include/rdma/uverbs_std_types.h | 18 ++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 99130083615e..2e8925434d74 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -133,16 +133,51 @@ struct uverbs_root_spec { * ======================================= */ +struct uverbs_attr_def { + u16 id; + struct uverbs_attr_spec attr; +}; + +struct uverbs_method_def { + u16 id; + /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ + u32 flags; + size_t num_attrs; + const struct uverbs_attr_def * const (*attrs)[]; + int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); +}; + struct uverbs_object_def { + u16 id; const struct uverbs_obj_type *type_attrs; + size_t num_methods; + const struct uverbs_method_def * const (*methods)[]; +}; + +struct uverbs_object_tree_def { + size_t num_objects; + const struct uverbs_object_def * const (*objects)[]; }; #define _UVERBS_OBJECT(_id, _type_attrs, ...) \ ((const struct uverbs_object_def) { \ + .id = _id, \ .type_attrs = _type_attrs}) #define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ const struct uverbs_object_def _name = \ _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) +#define _UVERBS_TREE_OBJECTS_SZ(...) \ + (sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \ + sizeof(const struct uverbs_object_def *)) +#define _UVERBS_OBJECT_TREE(...) \ + ((const struct uverbs_object_tree_def) { \ + .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__), \ + .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} }) +#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ + const struct uverbs_object_tree_def _name = \ + _UVERBS_OBJECT_TREE(__VA_ARGS__) + /* ================================================= * Parsing infrastructure * ================================================= diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index eda271b4aa6c..bef74099b7c5 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -35,6 +35,23 @@ #include +enum uverbs_default_objects { + UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_OBJECT_PD, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_OBJECT_CQ, + UVERBS_OBJECT_QP, + UVERBS_OBJECT_SRQ, + UVERBS_OBJECT_AH, + UVERBS_OBJECT_MR, + UVERBS_OBJECT_MW, + UVERBS_OBJECT_FLOW, + UVERBS_OBJECT_XRCD, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_OBJECT_WQ, + UVERBS_OBJECT_LAST, +}; + extern const struct uverbs_object_def uverbs_object_comp_channel; extern const struct uverbs_object_def uverbs_object_cq; extern const struct uverbs_object_def uverbs_object_qp; @@ -47,6 +64,7 @@ extern const struct uverbs_object_def uverbs_object_mr; extern const struct uverbs_object_def uverbs_object_mw; extern const struct uverbs_object_def uverbs_object_pd; extern const struct uverbs_object_def uverbs_object_xrcd; +extern const struct uverbs_object_def uverbs_object_device; static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, -- cgit From 118620d3686b2d624f9a5019f2f14c64cf50d21a Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:00 +0300 Subject: IB/core: Add uverbs merge trees functionality Different drivers support different features and even subset of the common uverbs implementation. Currently, this is handled as bitmask in every driver that represents which kind of methods it supports, but doesn't go down to attributes granularity. Moreover, drivers might want to add their specific types, methods and attributes to let their user-space counter-parts be exposed to some more efficient abstractions. It means that existence of different features is validated syntactically via the parsing infrastructure rather than using a complex in-handler logic. In order to do that, we allow defining features and abstractions as parsing trees. These per-feature parsing tree could be merged to an efficient (perfect-hash based) parsing tree, which is later used by the parsing infrastructure. To sum it up, this makes a parse tree unique for a device and represents only the features this particular device supports. This is done by having a root specification tree per feature. Before a device registers itself as an IB device, it merges all these trees into one parsing tree. This parsing tree is used to parse all user-space commands. A future user-space application could read this parse tree. This tree represents which objects, methods and attributes are supported by this device. This is based on the idea of Jason Gunthorpe Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 2e8925434d74..cf5b238d2d81 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -235,5 +235,43 @@ static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_ return test_bit(idx, attrs_hash->valid_bitmap); } -#endif +/* ================================================= + * Definitions -> Specs infrastructure + * ================================================= + */ + +/* + * uverbs_alloc_spec_tree - Merges different common and driver specific feature + * into one parsing tree that every uverbs command will be parsed upon. + * + * @num_trees: Number of trees in the array @trees. + * @trees: Array of pointers to tree root definitions to merge. Each such tree + * possibly contains objects, methods and attributes definitions. + * + * Returns: + * uverbs_root_spec *: The root of the merged parsing tree. + * On error, we return an error code. Error is checked via IS_ERR. + * + * The following merges could take place: + * a. Two trees representing the same method with different handler + * -> We take the handler of the tree that its handler != NULL + * and its index in the trees array is greater. The incentive for that + * is that developers are expected to first merge common trees and then + * merge trees that gives specialized the behaviour. + * b. Two trees representing the same object with different + * type_attrs (struct uverbs_obj_type): + * -> We take the type_attrs of the tree that its type_attr != NULL + * and its index in the trees array is greater. This could be used + * in order to override the free function, allocation size, etc. + * c. Two trees representing the same method attribute (same id but possibly + * different attributes): + * -> ERROR (-ENOENT), we believe that's not the programmer's intent. + * + * An object without any methods is considered invalid and will abort the + * function with -ENOENT error. + */ +struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_object_tree_def **trees); +void uverbs_free_spec_tree(struct uverbs_root_spec *root); +#endif -- cgit From 3541030650c0ddb5d52163082fee427b2a453799 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:01 +0300 Subject: IB/core: Add macros for declaring methods and attributes This patch adds macros for declaring objects, methods and attributes. These definitions are later used by downstream patches to declare some of the default types. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 105 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index cf5b238d2d81..9a8d217cdc1d 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -34,6 +34,8 @@ #define _UVERBS_IOCTL_ #include +#include +#include /* * ======================================= @@ -160,10 +162,99 @@ struct uverbs_object_tree_def { const struct uverbs_object_def * const (*objects)[]; }; +#define UA_FLAGS(_flags) .flags = _flags +#define __UVERBS_ATTR0(_id, _len, _type, ...) \ + ((const struct uverbs_attr_def) \ + {.id = _id, .attr = {.type = _type, {.len = _len}, .flags = 0, } }) +#define __UVERBS_ATTR1(_id, _len, _type, _flags) \ + ((const struct uverbs_attr_def) \ + {.id = _id, .attr = {.type = _type, {.len = _len}, _flags, } }) +#define __UVERBS_ATTR(_id, _len, _type, _flags, _n, ...) \ + __UVERBS_ATTR##_n(_id, _len, _type, _flags) +/* + * In new compiler, UVERBS_ATTR could be simplified by declaring it as + * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__} + * But since we support older compilers too, we need the more complex code. + */ +#define UVERBS_ATTR(_id, _len, _type, ...) \ + __UVERBS_ATTR(_id, _len, _type, ##__VA_ARGS__, 1, 0) +#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \ + UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_IN, ##__VA_ARGS__) +/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */ +#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \ + UVERBS_ATTR_PTR_IN_SZ(_id, sizeof(_type), ##__VA_ARGS__) +#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \ + UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_OUT, ##__VA_ARGS__) +#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \ + UVERBS_ATTR_PTR_OUT_SZ(_id, sizeof(_type), ##__VA_ARGS__) + +/* + * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring + * it as + * {.id = _id, \ + * .attr {.type = __obj_class, \ + * .obj = {.obj_type = _idr_type, \ + * .access = _access \ + * }, ##__VA_ARGS__ } } + * But since we support older compilers too, we need the more complex code. + */ +#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\ + ((const struct uverbs_attr_def) \ + {.id = _id, \ + .attr = {.type = _obj_class, \ + {.obj = {.obj_type = _obj_type, .access = _access } },\ + .flags = 0} }) +#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\ + ((const struct uverbs_attr_def) \ + {.id = _id, \ + .attr = {.type = _obj_class, \ + {.obj = {.obj_type = _obj_type, .access = _access} }, \ + _flags} }) +#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \ + _n, ...) \ + ___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags) +#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...) \ + ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, \ + ##__VA_ARGS__, 1, 0) +#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...) \ + __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\ + ##__VA_ARGS__) +#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...) \ + __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type, \ + (_access) + BUILD_BUG_ON_ZERO( \ + (_access) != UVERBS_ACCESS_NEW && \ + (_access) != UVERBS_ACCESS_READ), \ + ##__VA_ARGS__) +#define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \ + const struct uverbs_attr_def _name = __VA_ARGS__ + +#define _UVERBS_METHOD_ATTRS_SZ(...) \ + (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\ + sizeof(const struct uverbs_attr_def *)) +#define _UVERBS_METHOD(_id, _handler, _flags, ...) \ + ((const struct uverbs_method_def) { \ + .id = _id, \ + .flags = _flags, \ + .handler = _handler, \ + .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__), \ + .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} }) +#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...) \ + const struct uverbs_method_def _name = \ + _UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__) +#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...) \ + const struct uverbs_method_def _name = \ + _UVERBS_METHOD(_id, _handler, \ + UVERBS_ACTION_FLAG_CREATE_ROOT, \ + ##__VA_ARGS__) +#define _UVERBS_OBJECT_METHODS_SZ(...) \ + (sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \ + sizeof(const struct uverbs_method_def *)) #define _UVERBS_OBJECT(_id, _type_attrs, ...) \ ((const struct uverbs_object_def) { \ .id = _id, \ - .type_attrs = _type_attrs}) + .type_attrs = _type_attrs, \ + .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__), \ + .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} }) #define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ const struct uverbs_object_def _name = \ _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) @@ -235,6 +326,18 @@ static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_ return test_bit(idx, attrs_hash->valid_bitmap); } +static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle, + unsigned int idx) +{ + u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; + + if (attrs_bundle->num_buckets <= idx_bucket) + return false; + + return uverbs_attr_is_valid_in_hash(&attrs_bundle->hash[idx_bucket], + idx & ~UVERBS_ID_NS_MASK); +} + /* ================================================= * Definitions -> Specs infrastructure * ================================================= -- cgit From 4da70da23e9ba03f7f9e067fbe0eec6ebbfee401 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:02 +0300 Subject: IB/core: Explicitly destroy an object while keeping uobject When some objects are destroyed, we need to extract their status at destruction. After object's destruction, this status (e.g. events_reported) relies in the uobject. In order to have the latest and correct status, the underlying object should be destroyed, but we should keep the uobject alive and read this information off the uobject. We introduce a rdma_explicit_destroy function. This function destroys the class type object (for example, the IDR class type which destroys the underlying object as well) and then convert the uobject to be of a null class type. This uobject will then be destroyed as any other uobject once uverbs_finalize_object[s] is called. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 9760b6d70744..cc04ec65588d 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -129,6 +129,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int rdma_alloc_commit_uobject(struct ib_uobject *uobj); +int rdma_explicit_destroy(struct ib_uobject *uobject); struct uverbs_obj_fd_type { /* -- cgit From 64b19e1323e96c34af7ca90d1954e70890c7a98e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:03 +0300 Subject: IB/core: Export ioctl enum types to user-space Add a new ib_user_ioctl_verbs.h which exports all required ABI enums and structs to the user-space. Export the default types to user-space through this file. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_std_types.h | 18 +---------- include/uapi/rdma/ib_user_ioctl_verbs.h | 54 +++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 include/uapi/rdma/ib_user_ioctl_verbs.h (limited to 'include') diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index bef74099b7c5..400efe2a4d3c 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -34,23 +34,7 @@ #define _UVERBS_STD_TYPES__ #include - -enum uverbs_default_objects { - UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ - UVERBS_OBJECT_PD, - UVERBS_OBJECT_COMP_CHANNEL, - UVERBS_OBJECT_CQ, - UVERBS_OBJECT_QP, - UVERBS_OBJECT_SRQ, - UVERBS_OBJECT_AH, - UVERBS_OBJECT_MR, - UVERBS_OBJECT_MW, - UVERBS_OBJECT_FLOW, - UVERBS_OBJECT_XRCD, - UVERBS_OBJECT_RWQ_IND_TBL, - UVERBS_OBJECT_WQ, - UVERBS_OBJECT_LAST, -}; +#include extern const struct uverbs_object_def uverbs_object_comp_channel; extern const struct uverbs_object_def uverbs_object_cq; diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h new file mode 100644 index 000000000000..78a2e5be4d6e --- /dev/null +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IB_USER_IOCTL_VERBS_H +#define IB_USER_IOCTL_VERBS_H + +enum uverbs_default_objects { + UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_OBJECT_PD, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_OBJECT_CQ, + UVERBS_OBJECT_QP, + UVERBS_OBJECT_SRQ, + UVERBS_OBJECT_AH, + UVERBS_OBJECT_MR, + UVERBS_OBJECT_MW, + UVERBS_OBJECT_FLOW, + UVERBS_OBJECT_XRCD, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_OBJECT_WQ, + UVERBS_OBJECT_LAST, +}; + +#endif + -- cgit From d70724f149b107f8e4062320270d3d8b6713a1bb Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:04 +0300 Subject: IB/core: Add legacy driver's user-data In this phase, we don't want to change all the drivers to use flexible driver's specific attributes. Therefore, we add two default attributes: UHW_IN and UHW_OUT. These attributes are optional in some methods and they encode the driver specific command data. We add a function that extract this data and creates the legacy udata over it. Driver's data should start from UVERBS_UDATA_DRIVER_DATA_FLAG. This turns on the first bit of the namespace, indicating this attribute belongs to the driver's namespace. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 46 +++++++++++++++++++++++++++++++++ include/uapi/rdma/ib_user_ioctl_verbs.h | 10 +++++++ 2 files changed, 56 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9a8d217cdc1d..759afa0621ea 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -36,6 +36,7 @@ #include #include #include +#include /* * ======================================= @@ -338,6 +339,51 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b idx & ~UVERBS_ID_NS_MASK); } +static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; + + if (!uverbs_attr_is_valid(attrs_bundle, idx)) + return ERR_PTR(-ENOENT); + + return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK]; +} + +static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, const void *from) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + u16 flags; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; + return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) && + !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT; +} + +static inline int _uverbs_copy_from(void *to, size_t to_size, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data)) + memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); + else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) + return -EFAULT; + + return 0; +} + +#define uverbs_copy_from(to, attrs_bundle, idx) \ + _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx) + /* ================================================= * Definitions -> Specs infrastructure * ================================================= diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 78a2e5be4d6e..90f81eeca35b 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -33,6 +33,11 @@ #ifndef IB_USER_IOCTL_VERBS_H #define IB_USER_IOCTL_VERBS_H +#include + +#define UVERBS_UDATA_DRIVER_DATA_NS 1 +#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) + enum uverbs_default_objects { UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ UVERBS_OBJECT_PD, @@ -50,5 +55,10 @@ enum uverbs_default_objects { UVERBS_OBJECT_LAST, }; +enum { + UVERBS_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, + UVERBS_UHW_OUT, +}; + #endif -- cgit From 9ee79fce364216df35ec46e26d20780c3c1644cc Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:05 +0300 Subject: IB/core: Add completion queue (cq) object actions Adding CQ ioctl actions: 1. create_cq 2. destroy_cq This requires adding the following: 1. A specification describing the method a. Handler b. Attributes specification Each attribute is one of the following: a. PTR_IN - input data Note: This could be encoded inlined for data < 64bit b. PTR_OUT - response data c. IDR - idr based object d. FD - fd based object Blobs attributes (clauses a and b) contain their type, while objects specifications (clauses c and d) contains the expected object type (for example, the given id should be UVERBS_TYPE_PD) and the required access (READ, WRITE, NEW or DESTROY). If a NEW is required, the new object's id will be assigned to this attribute. All attributes could get UA_FLAGS attribute. Currently we support stating that an attribute is mandatory or that the specification size corresponds to a lower bound (and that this attribute could be extended). We currently add both default attributes and the two generic UHW_IN and UHW_OUT driver specific attributes. 2. Handler A handler gets a uverbs_attr_bundle. The handler developer uses uverbs_attr_get to fetch an attribute of a given id. Each of these attribute groups correspond to the specification group defined in the action (clauses 1.b and 1.c respectively). The indices of these arrays corresponds to the attribute ids declared in the specifications (clause 2). The handler is quite simple. It assumes the infrastructure fetched all objects and locked, created or destroyed them as required by the specification. Pointer (or blob) attributes were validated to match their required sizes. After the handler finished, the infrastructure commits or rollbacks the objects. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/uapi/rdma/ib_user_ioctl_verbs.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 90f81eeca35b..842792eae383 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -60,5 +60,25 @@ enum { UVERBS_UHW_OUT, }; +enum uverbs_create_cq_cmd_attr_ids { + CREATE_CQ_HANDLE, + CREATE_CQ_CQE, + CREATE_CQ_USER_HANDLE, + CREATE_CQ_COMP_CHANNEL, + CREATE_CQ_COMP_VECTOR, + CREATE_CQ_FLAGS, + CREATE_CQ_RESP_CQE, +}; + +enum uverbs_destroy_cq_cmd_attr_ids { + DESTROY_CQ_HANDLE, + DESTROY_CQ_RESP, +}; + +enum uverbs_actions_cq_ops { + UVERBS_CQ_CREATE, + UVERBS_CQ_DESTROY, +}; + #endif -- cgit From 524271129401ed896dc76e49acdbafc506cb41ac Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:06 +0300 Subject: IB/core: Assign root to all drivers In order to use the parsing tree, we need to assign the root to all drivers. Currently, we just assign the default parsing tree via ib_uverbs_add_one. The driver could override this by assigning a parsing tree prior to registering the device. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/uverbs_ioctl.h | 12 ++++++++++++ include/rdma/uverbs_std_types.h | 14 ++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 759afa0621ea..6da44079aa58 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -419,8 +419,20 @@ static inline int _uverbs_copy_from(void *to, size_t to_size, * An object without any methods is considered invalid and will abort the * function with -ENOENT error. */ +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, const struct uverbs_object_tree_def **trees); void uverbs_free_spec_tree(struct uverbs_root_spec *root); +#else +static inline struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_object_tree_def **trees) +{ + return NULL; +} + +static inline void uverbs_free_spec_tree(struct uverbs_root_spec *root) +{ +} +#endif #endif diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 400efe2a4d3c..5f8e20bbd67c 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -34,8 +34,10 @@ #define _UVERBS_STD_TYPES__ #include +#include #include +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) extern const struct uverbs_object_def uverbs_object_comp_channel; extern const struct uverbs_object_def uverbs_object_cq; extern const struct uverbs_object_def uverbs_object_qp; @@ -50,6 +52,18 @@ extern const struct uverbs_object_def uverbs_object_pd; extern const struct uverbs_object_def uverbs_object_xrcd; extern const struct uverbs_object_def uverbs_object_device; +extern const struct uverbs_object_tree_def uverbs_default_objects; +static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) +{ + return &uverbs_default_objects; +} +#else +static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) +{ + return NULL; +} +#endif + static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, struct ib_ucontext *ucontext, -- cgit From 75e3ea5ba7d7a6c171c62c710990f2a5fb07e19a Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 5 Jul 2017 13:08:30 -0700 Subject: xen: introduce the pvcalls interface header Introduce the C header file which defines the PV Calls interface. It is imported from xen/include/public/io/pvcalls.h. Signed-off-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Reviewed-by: Juergen Gross CC: konrad.wilk@oracle.com CC: boris.ostrovsky@oracle.com CC: jgross@suse.com Signed-off-by: Boris Ostrovsky --- include/xen/interface/io/pvcalls.h | 121 +++++++++++++++++++++++++++++++++++++ include/xen/interface/io/ring.h | 2 + 2 files changed, 123 insertions(+) create mode 100644 include/xen/interface/io/pvcalls.h (limited to 'include') diff --git a/include/xen/interface/io/pvcalls.h b/include/xen/interface/io/pvcalls.h new file mode 100644 index 000000000000..ccf97b817e72 --- /dev/null +++ b/include/xen/interface/io/pvcalls.h @@ -0,0 +1,121 @@ +#ifndef __XEN_PUBLIC_IO_XEN_PVCALLS_H__ +#define __XEN_PUBLIC_IO_XEN_PVCALLS_H__ + +#include +#include +#include + +/* "1" means socket, connect, release, bind, listen, accept and poll */ +#define XENBUS_FUNCTIONS_CALLS "1" + +/* + * See docs/misc/pvcalls.markdown in xen.git for the full specification: + * https://xenbits.xen.org/docs/unstable/misc/pvcalls.html + */ +struct pvcalls_data_intf { + RING_IDX in_cons, in_prod, in_error; + + uint8_t pad1[52]; + + RING_IDX out_cons, out_prod, out_error; + + uint8_t pad2[52]; + + RING_IDX ring_order; + grant_ref_t ref[]; +}; +DEFINE_XEN_FLEX_RING(pvcalls); + +#define PVCALLS_SOCKET 0 +#define PVCALLS_CONNECT 1 +#define PVCALLS_RELEASE 2 +#define PVCALLS_BIND 3 +#define PVCALLS_LISTEN 4 +#define PVCALLS_ACCEPT 5 +#define PVCALLS_POLL 6 + +struct xen_pvcalls_request { + uint32_t req_id; /* private to guest, echoed in response */ + uint32_t cmd; /* command to execute */ + union { + struct xen_pvcalls_socket { + uint64_t id; + uint32_t domain; + uint32_t type; + uint32_t protocol; + } socket; + struct xen_pvcalls_connect { + uint64_t id; + uint8_t addr[28]; + uint32_t len; + uint32_t flags; + grant_ref_t ref; + uint32_t evtchn; + } connect; + struct xen_pvcalls_release { + uint64_t id; + uint8_t reuse; + } release; + struct xen_pvcalls_bind { + uint64_t id; + uint8_t addr[28]; + uint32_t len; + } bind; + struct xen_pvcalls_listen { + uint64_t id; + uint32_t backlog; + } listen; + struct xen_pvcalls_accept { + uint64_t id; + uint64_t id_new; + grant_ref_t ref; + uint32_t evtchn; + } accept; + struct xen_pvcalls_poll { + uint64_t id; + } poll; + /* dummy member to force sizeof(struct xen_pvcalls_request) + * to match across archs */ + struct xen_pvcalls_dummy { + uint8_t dummy[56]; + } dummy; + } u; +}; + +struct xen_pvcalls_response { + uint32_t req_id; + uint32_t cmd; + int32_t ret; + uint32_t pad; + union { + struct _xen_pvcalls_socket { + uint64_t id; + } socket; + struct _xen_pvcalls_connect { + uint64_t id; + } connect; + struct _xen_pvcalls_release { + uint64_t id; + } release; + struct _xen_pvcalls_bind { + uint64_t id; + } bind; + struct _xen_pvcalls_listen { + uint64_t id; + } listen; + struct _xen_pvcalls_accept { + uint64_t id; + } accept; + struct _xen_pvcalls_poll { + uint64_t id; + } poll; + struct _xen_pvcalls_dummy { + uint8_t dummy[8]; + } dummy; + } u; +}; + +DEFINE_RING_TYPES(xen_pvcalls, struct xen_pvcalls_request, + struct xen_pvcalls_response); + +#endif diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index c79456855539..e547088ceb0e 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -9,6 +9,8 @@ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ +#include + typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ -- cgit From b4feaeb036b2e0f715e0d05ae84da01c6696a75f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 27 Jul 2017 17:11:58 +0200 Subject: xen: cleanup xen.h The macros for testing domain types are more complicated then they need to. Simplify them. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- include/xen/xen.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/xen/xen.h b/include/xen/xen.h index 6e8b7fc79801..28c59ca529d7 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -13,11 +13,16 @@ extern enum xen_domain_type xen_domain_type; #define xen_domain_type XEN_NATIVE #endif +#ifdef CONFIG_XEN_PVH +extern bool xen_pvh; +#else +#define xen_pvh 0 +#endif + #define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) +#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) +#define xen_pvh_domain() (xen_pvh) #ifdef CONFIG_XEN_DOM0 #include @@ -29,11 +34,4 @@ extern enum xen_domain_type xen_domain_type; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ -#ifdef CONFIG_XEN_PVH -extern bool xen_pvh; -#define xen_pvh_domain() (xen_hvm_domain() && xen_pvh) -#else -#define xen_pvh_domain() (0) -#endif - #endif /* _XEN_XEN_H */ -- cgit From 882bbe56aed0cbb00b374454f6c069919c9228dd Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 4 Aug 2017 13:36:12 +0200 Subject: xen: remove unused function xen_set_domain_pte() The function xen_set_domain_pte() is used nowhere in the kernel. Remove it. Signed-off-by: Juergen Gross Acked-by: Steven Rostedt (VMware) Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- include/trace/events/xen.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index b70a38b7fa84..677e8ac2bb81 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -149,24 +149,6 @@ DECLARE_EVENT_CLASS(xen_mmu__set_pte, DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte); DEFINE_XEN_MMU_SET_PTE(xen_mmu_set_pte_atomic); -TRACE_EVENT(xen_mmu_set_domain_pte, - TP_PROTO(pte_t *ptep, pte_t pteval, unsigned domid), - TP_ARGS(ptep, pteval, domid), - TP_STRUCT__entry( - __field(pte_t *, ptep) - __field(pteval_t, pteval) - __field(unsigned, domid) - ), - TP_fast_assign(__entry->ptep = ptep; - __entry->pteval = pteval.pte; - __entry->domid = domid), - TP_printk("ptep %p pteval %0*llx (raw %0*llx) domid %u", - __entry->ptep, - (int)sizeof(pteval_t) * 2, (unsigned long long)pte_val(native_make_pte(__entry->pteval)), - (int)sizeof(pteval_t) * 2, (unsigned long long)__entry->pteval, - __entry->domid) - ); - TRACE_EVENT(xen_mmu_set_pte_at, TP_PROTO(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval), -- cgit From a0e4fd14ba0b90b6fcea9c0f7f6de6886479427d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 4 Aug 2017 13:36:13 +0200 Subject: xen: remove not used trace functions There are some Xen specific trace functions defined in include/trace/events/xen.h. Remove them. Signed-off-by: Juergen Gross Acked-by: Steven Rostedt (VMware) Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- include/trace/events/xen.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 677e8ac2bb81..1b4fed72f573 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -248,16 +248,6 @@ TRACE_EVENT(xen_mmu_set_p4d, (int)sizeof(p4dval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->p4dval)), (int)sizeof(p4dval_t) * 2, (unsigned long long)__entry->p4dval) ); - -TRACE_EVENT(xen_mmu_pud_clear, - TP_PROTO(pud_t *pudp), - TP_ARGS(pudp), - TP_STRUCT__entry( - __field(pud_t *, pudp) - ), - TP_fast_assign(__entry->pudp = pudp), - TP_printk("pudp %p", __entry->pudp) - ); #else TRACE_EVENT(xen_mmu_set_pud, @@ -277,16 +267,6 @@ TRACE_EVENT(xen_mmu_set_pud, #endif -TRACE_EVENT(xen_mmu_pgd_clear, - TP_PROTO(pgd_t *pgdp), - TP_ARGS(pgdp), - TP_STRUCT__entry( - __field(pgd_t *, pgdp) - ), - TP_fast_assign(__entry->pgdp = pgdp), - TP_printk("pgdp %p", __entry->pgdp) - ); - DECLARE_EVENT_CLASS(xen_mmu_ptep_modify_prot, TP_PROTO(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval), -- cgit From de29faa0d8ac925534749bc56d539bf936ce122b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Dec 2016 19:25:00 +0000 Subject: irqchip/gic-v4: Add management structure definitions Add a bunch of GICv4-specific data structures that will get used in subsequent patches. Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 92 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 include/linux/irqchip/arm-gic-v4.h (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h new file mode 100644 index 000000000000..d499538dd86f --- /dev/null +++ b/include/linux/irqchip/arm-gic-v4.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016,2017 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __LINUX_IRQCHIP_ARM_GIC_V4_H +#define __LINUX_IRQCHIP_ARM_GIC_V4_H + +struct its_vpe; + +/* Embedded in kvm.arch */ +struct its_vm { + struct fwnode_handle *fwnode; + struct irq_domain *domain; + struct page *vprop_page; + struct its_vpe **vpes; + int nr_vpes; + irq_hw_number_t db_lpi_base; + unsigned long *db_bitmap; + int nr_db_lpis; +}; + +/* Embedded in kvm_vcpu.arch */ +struct its_vpe { + struct page *vpt_page; + struct its_vm *its_vm; + /* Doorbell interrupt */ + int irq; + irq_hw_number_t vpe_db_lpi; + /* + * This collection ID is used to indirect the target + * redistributor for this VPE. The ID itself isn't involved in + * programming of the ITS. + */ + u16 col_idx; + /* Unique (system-wide) VPE identifier */ + u16 vpe_id; + /* Implementation Defined Area Invalid */ + bool idai; + /* Pending VLPIs on schedule out? */ + bool pending_last; +}; + +/* + * struct its_vlpi_map: structure describing the mapping of a + * VLPI. Only to be interpreted in the context of a physical interrupt + * it complements. To be used as the vcpu_info passed to + * irq_set_vcpu_affinity(). + * + * @vm: Pointer to the GICv4 notion of a VM + * @vpe: Pointer to the GICv4 notion of a virtual CPU (VPE) + * @vintid: Virtual LPI number + * @db_enabled: Is the VPE doorbell to be generated? + */ +struct its_vlpi_map { + struct its_vm *vm; + struct its_vpe *vpe; + u32 vintid; + bool db_enabled; +}; + +enum its_vcpu_info_cmd_type { + MAP_VLPI, + GET_VLPI, + PROP_UPDATE_VLPI, + PROP_UPDATE_AND_INV_VLPI, + SCHEDULE_VPE, + DESCHEDULE_VPE, + INVALL_VPE, +}; + +struct its_cmd_info { + enum its_vcpu_info_cmd_type cmd_type; + union { + struct its_vlpi_map *map; + u8 config; + }; +}; + +#endif -- cgit From d7276b80e752acaaf30f04e62d0e986b5bced2df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:11:47 +0000 Subject: irqchip/gic-v3-its: Add GICv4 ITS command definitions Add the new GICv4 ITS command definitions, most of them, being defined in terms of their physical counterparts. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index af8c55105fc2..7c6fd8f3e36c 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -347,6 +347,18 @@ #define GITS_CMD_CLEAR 0x04 #define GITS_CMD_SYNC 0x05 +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP is the odd one, as it doesn't have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) + /* * ITS error numbers */ -- cgit From 3ca63f363f3f8fe457482c53d5c86d83bff21e64 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jan 2017 13:39:52 +0000 Subject: irqchip/gic-v3-its: Add VPENDBASER/VPROPBASER accessors V{PEND,PROP}BASER being 64bit registers, they need some ad-hoc accessors on 32bit, specially given that VPENDBASER contains a Valid bit, making the access a bit convoluted. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 7c6fd8f3e36c..17ba0d732f12 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -212,6 +212,11 @@ #define LPI_PROP_GROUP1 (1 << 1) #define LPI_PROP_ENABLED (1 << 0) +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + /* * ITS registers, offsets from ITS_base */ -- cgit From e643d80340363c9d172abfbe437537196cfc1643 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:09:31 +0000 Subject: irqchip/gic-v3-its: Add VPE scheduling When a VPE is scheduled to run, the corresponding redistributor must be told so, by setting VPROPBASER to the VM's property table, and VPENDBASER to the vcpu's pending table. When scheduled out, we preserve the IDAI and PendingLast bits. The latter is specially important, as it tells the hypervisor that there are pending interrupts for this vcpu. Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 58 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 17ba0d732f12..6bc142cfa616 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -212,6 +212,64 @@ #define LPI_PROP_GROUP1 (1 << 1) #define LPI_PROP_ENABLED (1 << 0) +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + #define GICR_VPENDBASER_Dirty (1ULL << 60) #define GICR_VPENDBASER_PendingLast (1ULL << 61) #define GICR_VPENDBASER_IDAI (1ULL << 62) -- cgit From 20b3d54ecba51c5fe476eea94ffdc463559c5c85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:23:22 +0000 Subject: irqchip/gic-v3-its: Add device proxy for VPE management if !DirectLpi When we don't have the DirectLPI feature, we must work around the architecture shortcomings to be able to perform the required maintenance (interrupt masking, clearing and injection). For this, we create a fake device whose sole purpose is to provide a way to issue commands as if we were dealing with LPIs coming from that device (while they actually originate from the ITS). This fake device doesn't have LPIs allocated to it, but instead uses the VPE LPIs. Of course, this could be a real bottleneck, and a naive implementation would require 6 commands to issue an invalidation. Instead, let's allocate at least one event per physical CPU (rounded up to the next power of 2), and opportunistically map the VPE doorbell to an event. This doorbell will be mapped until we roll over and need to reallocate this slot. This ensures that most of the time, we only need 2 commands to issue an INV, INT or CLEAR, making the performance a lot better, given that we always issue a CLEAR on entry, and an INV on each side of a trapped WFI. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index d499538dd86f..e7a93ad4fe97 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; + /* VPE proxy mapping */ + int vpe_proxy_event; /* * This collection ID is used to indirect the target * redistributor for this VPE. The ID itself isn't involved in -- cgit From d51c4b4da7f8fae8c884e3b89fdab906f66da28a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Jun 2017 21:24:25 +0100 Subject: irqchip/gic-v3-its: Set implementation defined bit to enable VLPIs A long time ago, GITS_CTLR[1] used to be called GITC_CTLR.EnableVLPI. It has been subsequently deprecated and is now an "Implementation Defined" bit that may ot may not be set for GICv4. Brilliant. And the current crop of the FastModel requires that bit for VLPIs to be enabled. Oh well... Let's set it and find out what breaks. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6bc142cfa616..1ea576c8126f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -298,6 +298,7 @@ #define GITS_TRANSLATER 0x10040 #define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) #define GITS_CTLR_ITS_NUMBER_SHIFT 4 #define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) #define GITS_CTLR_QUIESCENT (1U << 31) -- cgit From 7de5c0af9c7c717f9052e6d75b24f90050e6a56e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:27:52 +0000 Subject: irqchip/gic-v4: Add per-VM VPE domain creation When creating a VM, it is very convenient to have an irq domain containing all the doorbell interrupts associated with that VM (each interrupt representing a VPE). Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e7a93ad4fe97..3dc811dc53da 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -91,4 +91,7 @@ struct its_cmd_info { }; }; +int its_alloc_vcpu_irqs(struct its_vm *vm); +void its_free_vcpu_irqs(struct its_vm *vm); + #endif -- cgit From eab84318c2811e3f38c080efcc7f709f51bb8370 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:31:02 +0000 Subject: irqchip/gic-v4: Add VPE command interface Add the required interfaces to schedule a VPE and perform a VINVALL command. Reviewed-by: Thomas Gleixner Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 3dc811dc53da..6450f3ed101f 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -93,5 +93,7 @@ struct its_cmd_info { int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); +int its_schedule_vpe(struct its_vpe *vpe, bool on); +int its_invall_vpe(struct its_vpe *vpe); #endif -- cgit From f2eac75de435871d5a497f8b557874a2a8a7b264 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Dec 2016 21:50:32 +0000 Subject: irqchip/gic-v4: Add VLPI configuration interface Add the required interfaces to map, unmap and update a VLPI. Reviewed-by: Eric Auger Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6450f3ed101f..e22f878ad017 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -95,5 +95,9 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_schedule_vpe(struct its_vpe *vpe, bool on); int its_invall_vpe(struct its_vpe *vpe); +int its_map_vlpi(int irq, struct its_vlpi_map *map); +int its_get_vlpi(int irq, struct its_vlpi_map *map); +int its_unmap_vlpi(int irq); +int its_prop_update_vlpi(int irq, u8 config, bool inv); #endif -- cgit From 3d63cb53e221d8ab347e94aeac0b5511857beb7f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:31:54 +0000 Subject: irqchip/gic-v4: Enable low-level GICv4 operations Get the show on the road... Reviewed-by: Thomas Gleixner Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e22f878ad017..58a4d89aa82c 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -100,4 +100,6 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map); int its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); +int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops); + #endif -- cgit From 4bdf502517288662d883fbaa915874790f51a2cd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Jun 2017 14:10:46 +0100 Subject: irqchip/gic-v3: Advertise GICv4 support to KVM As KVM needs to know about the availability of GICv4 to enable direct injection of interrupts, let's advertise the feature in the gic_kvm_info structure. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-common.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index c647b0547bcd..0a83b4379f34 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -27,6 +27,8 @@ struct gic_kvm_info { unsigned int maint_irq; /* Virtual control interface */ struct resource vctrl; + /* vlpi support */ + bool has_v4; }; const struct gic_kvm_info *gic_get_kvm_info(void); -- cgit From b5f515735bea4ae71c248aea3e049073f8852889 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Thu, 31 Aug 2017 11:09:45 -0400 Subject: ext4: avoid Y2038 overflow in recently_deleted() Avoid a 32-bit time overflow in recently_deleted() since i_dtime (inode deletion time) is stored only as a 32-bit value on disk. Since i_dtime isn't used for much beyond a boolean value in e2fsck and is otherwise only used in this function in the kernel, there is no benefit to use more space in the inode for this field on disk. Instead, compare only the relative deletion time with the low 32 bits of the time using the newly-added time_before32() helper, which is similar to time_before() and time_after() for jiffies. Increase RECENTCY_DIRTY to 300s based on Ted's comments about usage experience at Google. Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o Reviewed-by: Arnd Bergmann --- include/linux/time.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 4abb32d4c6b8..3877136bbdf8 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -285,4 +285,19 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its) return true; } +/** + * time_after32 - compare two 32-bit relative times + * @a: the time which may be after @b + * @b: the time which may be before @a + * + * time_after32(a, b) returns true if the time @a is after time @b. + * time_before32(b, a) returns true if the time @b is before time @a. + * + * Similar to time_after(), compare two 32-bit timestamps for relative + * times. This is useful for comparing 32-bit seconds values that can't + * be converted to 64-bit values (e.g. due to disk format or wire protocol + * issues) when it is known that the times are less than 68 years apart. + */ +#define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0) +#define time_before32(b, a) time_after32(a, b) #endif -- cgit From a27bfcab5c1c3a0df61b68e85fc5e4cade83559f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 31 Aug 2017 09:47:22 -0600 Subject: genalloc: Fix an incorrect kerneldoc comment The kerneldoc comment for the genpool_algo_t typedef was incomplete and incorrectly formatted, leading to a raft of warnings during the docs build. Fix it appropriately. Signed-off-by: Jonathan Corbet --- include/linux/genalloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 29d4385903d4..6dfec4d638df 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -38,12 +38,13 @@ struct device_node; struct gen_pool; /** - * Allocation callback function type definition + * typedef genpool_algo_t: Allocation callback function type definition * @map: Pointer to bitmap * @size: The bitmap size in bits * @start: The bitnumber to start searching at * @nr: The number of zeroed bits we're looking for - * @data: optional additional data used by @genpool_algo_t + * @data: optional additional data used by the callback + * @pool: the pool being allocated from */ typedef unsigned long (*genpool_algo_t)(unsigned long *map, unsigned long size, -- cgit From eb3c74de28b24f5a36d12d6c84f1fceb25d12c4f Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 28 Aug 2017 19:02:41 +0800 Subject: usb: phy: Avoid unchecked dereference warning Move the USB phy NULL checking before issuing usb_phy_set_charger_current() to avoid unchecked dereference warning. Signed-off-by: Baolin Wang Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/phy.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index de881b171ba9..8c6914873a16 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -322,9 +322,12 @@ static inline void usb_phy_set_charger_state(struct usb_phy *usb_phy, static inline int usb_phy_set_power(struct usb_phy *x, unsigned mA) { + if (!x) + return 0; + usb_phy_set_charger_current(x, mA); - if (x && x->set_power) + if (x->set_power) return x->set_power(x, mA); return 0; } -- cgit From 2eb7954809bf26de27bc3a2fea4eef606bbf4482 Mon Sep 17 00:00:00 2001 From: Jaghathiswari Rankappagounder Natarajan Date: Wed, 30 Aug 2017 16:34:33 -0700 Subject: drivers: w1: add hwmon support structures This patch has changes to w1.h/w1.c generic files to add (optional) hwmon support structures. Signed-off-by: Jaghathiswari Rankappagounder Natarajan Acked-by: Evgeniy Polyakov Acked-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/w1.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/w1.h b/include/linux/w1.h index 90cbe7e65059..5b2972946dda 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -68,6 +68,7 @@ struct w1_reg_num { * @family: module for device family type * @family_data: pointer for use by the family module * @dev: kernel device identifier + * @hwmon: pointer to hwmon device * */ struct w1_slave { @@ -83,6 +84,7 @@ struct w1_slave { struct w1_family *family; void *family_data; struct device dev; + struct device *hwmon; }; typedef void (*w1_slave_found_callback)(struct w1_master *, u64); @@ -250,11 +252,13 @@ void w1_remove_master_device(struct w1_bus_master *master); * @add_slave: add_slave * @remove_slave: remove_slave * @groups: sysfs group + * @chip_info: pointer to struct hwmon_chip_info */ struct w1_family_ops { int (*add_slave)(struct w1_slave *sl); void (*remove_slave)(struct w1_slave *sl); const struct attribute_group **groups; + const struct hwmon_chip_info *chip_info; }; /** -- cgit From e1eb899b45781b9bb77e6d7772d6e67bb0bc1a18 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 25 Aug 2017 09:14:43 +0200 Subject: drm/amdgpu: add IOCTL interface for per VM BOs v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the IOCTL interface so that applications can allocate per VM BOs. Still WIP since not all corner cases are tested yet, but this reduces average CS overhead for 10K BOs from 21ms down to 48us. v2: add some extra checks, remove the WIP tag v3: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 7b8fa11c2285..e055776f2f4c 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -87,6 +87,8 @@ extern "C" { #define AMDGPU_GEM_CREATE_SHADOW (1 << 4) /* Flag that allocating the BO should use linear VRAM */ #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) struct drm_amdgpu_gem_create_in { /** the requested memory size */ -- cgit From 07d79fc7d94e3f884b8b1c95aa615b202bb5e4c1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 30 Aug 2017 14:30:36 -0700 Subject: net_sched: add reverse binding for tc class TC filters when used as classifiers are bound to TC classes. However, there is a hidden difference when adding them in different orders: 1. If we add tc classes before its filters, everything is fine. Logically, the classes exist before we specify their ID's in filters, it is easy to bind them together, just as in the current code base. 2. If we add tc filters before the tc classes they bind, we have to do dynamic lookup in fast path. What's worse, this happens all the time not just once, because on fast path tcf_result is passed on stack, there is no way to propagate back to the one in tc filters. This hidden difference hurts performance silently if we have many tc classes in hierarchy. This patch intends to close this gap by doing the reverse binding when we create a new class, in this case we can actually search all the filters in its parent, match and fixup by classid. And because tcf_result is specific to each type of tc filter, we have to introduce a new ops for each filter to tell how to bind the class. Note, we still can NOT totally get rid of those class lookup in ->enqueue() because cgroup and flow filters have no way to determine the classid at setup time, they still have to go through dynamic lookup. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c30b634c5f82..d6247a3c40df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,7 @@ struct tcf_proto_ops { void **, bool); int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*bind_class)(void *, u32, unsigned long); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, -- cgit From e3cfddd577e763496c25ddb855cfff48b1174b63 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 30 Aug 2017 22:18:13 -0700 Subject: bridge: add tracepoint in br_fdb_update This extends bridge fdb table tracepoints to also cover learned fdb entries in the br_fdb_update path. Note that unlike other tracepoints I have moved this to when the fdb is modified because this is in the datapath and can generate a lot of noise in the trace output. br_fdb_update is also called from added_by_user context in the NTF_USE case which is already traced ..hence the !added_by_user check. Signed-off-by: Roopa Prabhu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/trace/events/bridge.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h index 0f1cde00af11..1bee3e7fdf32 100644 --- a/include/trace/events/bridge.h +++ b/include/trace/events/bridge.h @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete, __entry->addr[4], __entry->addr[5], __entry->vid) ); +TRACE_EVENT(br_fdb_update, + + TP_PROTO(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, bool added_by_user), + + TP_ARGS(br, source, addr, vid, added_by_user), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, source->dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(bool, added_by_user) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, source->dev->name); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + __entry->added_by_user = added_by_user; + ), + + TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid, + __entry->added_by_user) +); + + #endif /* _TRACE_BRIDGE_H */ /* This part must be outside protection */ -- cgit From f16ded5948a5e6a399161bd71e866b198952ea5f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 31 Aug 2017 13:41:40 +0300 Subject: net: fix two typos in net_device_ops documentation. This patch fixes two trivial typos in net_device_ops documentation, related to ndo_xdp_flush callback. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29bf06ff157c..35de8312e0b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1124,8 +1124,8 @@ struct xfrmdev_ops { * This function is used to submit a XDP packet for transmit on a * netdevice. * void (*ndo_xdp_flush)(struct net_device *dev); - * This function is used to inform the driver to flush a paticular - * xpd tx queue. Must be called on same CPU as xdp_xmit. + * This function is used to inform the driver to flush a particular + * xdp tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); -- cgit From 8fc614c0ae5cb5df11d6aa9559e63baacf20a840 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 31 Aug 2017 14:12:39 -0500 Subject: PCI/AER: Reformat AER register definitions Reformat so comments fit on same line as definition. No functional change intended. Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index c22d3ebaca20..46632aaee1c0 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -733,23 +733,17 @@ #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ #define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ #define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ -/* Correctable Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 -/* Non-fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 -/* Fatal Err Reporting Enable */ -#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 +#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ +#define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ #define PCI_ERR_ROOT_STATUS 48 -#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ -/* Multi ERR_COR Received */ -#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 -/* ERR_FATAL/NONFATAL Received */ -#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 -/* Multi ERR_FATAL/NONFATAL Received */ -#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 -#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First Fatal */ -#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ -#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ +#define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ +#define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ +#define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ +#define PCI_ERR_ROOT_MULTI_UNCOR_RCV 0x00000008 /* Multiple FATAL/NONFATAL */ +#define PCI_ERR_ROOT_FIRST_FATAL 0x00000010 /* First UNC is Fatal */ +#define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ +#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ #define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ /* Virtual Channel */ -- cgit From 60361e12d01676e23a8de89a5ef4a349ae97f616 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Wed, 30 Aug 2017 05:36:38 -0500 Subject: ftrace: Fix debug preempt config name in stack_tracer_{en,dis}able stack_tracer_disable()/stack_tracer_enable() had been using the wrong name for the config symbol to enable their preempt-debugging checks -- fix with a word swap. Link: http://lkml.kernel.org/r/20170831154036.4xldyakmmhuts5x7@hatter.bewilderbeest.net Cc: stable@vger.kernel.org Fixes: 8aaf1ee70e ("tracing: Rename trace_active to disable_stack_tracer and inline its modification") Signed-off-by: Zev Weiss Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6383115e9d2c..2e028854bac7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -307,7 +307,7 @@ DECLARE_PER_CPU(int, disable_stack_tracer); static inline void stack_tracer_disable(void) { /* Preemption or interupts must be disabled */ - if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_inc(disable_stack_tracer); } @@ -320,7 +320,7 @@ static inline void stack_tracer_disable(void) */ static inline void stack_tracer_enable(void) { - if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); this_cpu_dec(disable_stack_tracer); } -- cgit From 89e4fdecb51cf5535867026274bc97de9480ade5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 24 Aug 2017 00:03:43 -0700 Subject: loop: add ioctl for changing logical block size This is a different approach from the first attempt in f2c6df7dbf9a ("loop: support 4k physical blocksize"). Rather than extending LOOP_{GET,SET}_STATUS, add a separate ioctl just for setting the block size. Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/uapi/linux/loop.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index c8125ec1f4f2..23158dbe2424 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -88,6 +88,7 @@ struct loop_info64 { #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 #define LOOP_SET_DIRECT_IO 0x4C08 +#define LOOP_SET_BLOCK_SIZE 0x4C09 /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 -- cgit From 4f59c718521a0f00b6589da6b8fcea2dc296026d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Jul 2017 11:40:39 -0400 Subject: teach SYSCALL_DEFINE/COMPAT_SYSCALL_DEFINE to handle __bitwise arguments Signed-off-by: Al Viro --- include/linux/compat.h | 2 +- include/linux/syscalls.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 5a6a109b4a50..e5d3fbe24f7d 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -27,7 +27,7 @@ #endif #ifndef __SC_DELOUSE -#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) +#define __SC_DELOUSE(t,v) ((__force t)(unsigned long)(v)) #endif #define COMPAT_SYSCALL_DEFINE0(name) \ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3cb15ea48aee..0bc1d2e8cc17 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -100,11 +100,12 @@ union bpf_attr; #define __MAP(n,...) __MAP##n(__VA_ARGS__) #define __SC_DECL(t, a) t a -#define __TYPE_IS_L(t) (__same_type((t)0, 0L)) -#define __TYPE_IS_UL(t) (__same_type((t)0, 0UL)) -#define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL)) +#define __TYPE_AS(t, v) __same_type((__force t)0, v) +#define __TYPE_IS_L(t) (__TYPE_AS(t, 0L)) +#define __TYPE_IS_UL(t) (__TYPE_AS(t, 0UL)) +#define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a -#define __SC_CAST(t, a) (t) a +#define __SC_CAST(t, a) (__force t) a #define __SC_ARGS(t, a) a #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long)) -- cgit From ddef7ed2b5cbafae692d1d580bb5a07808926a9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jul 2017 18:58:37 +0200 Subject: annotate RWF_... flags [AV: added missing annotations in syscalls.h/compat.h] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/compat.h | 16 ++++++++++++++-- include/linux/fs.h | 12 +++++++----- include/linux/syscalls.h | 4 ++-- include/uapi/linux/aio_abi.h | 21 +++++++++++---------- include/uapi/linux/fs.h | 28 ++++++++++++++++++++-------- 5 files changed, 54 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index e5d3fbe24f7d..3fc433303d7a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -365,10 +365,10 @@ asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, compat_ulong_t vlen, u32 pos_low, u32 pos_high); asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd, const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags); + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd, const struct compat_iovec __user *vec, - compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags); + compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags); #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 asmlinkage long compat_sys_preadv64(unsigned long fd, @@ -382,6 +382,18 @@ asmlinkage long compat_sys_pwritev64(unsigned long fd, unsigned long vlen, loff_t pos); #endif +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 +asmlinkage long compat_sys_readv64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif + +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 +asmlinkage long compat_sys_pwritev64v2(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos, rwf_t flags); +#endif + asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, diff --git a/include/linux/fs.h b/include/linux/fs.h index cbfe127bccf8..2625fc47c7e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -72,6 +72,8 @@ extern int leases_enable, lease_break_time; extern int sysctl_protected_symlinks; extern int sysctl_protected_hardlinks; +typedef __kernel_rwf_t rwf_t; + struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); @@ -1758,9 +1760,9 @@ extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *) extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, - unsigned long, loff_t *, int); + unsigned long, loff_t *, rwf_t); extern ssize_t vfs_writev(struct file *, const struct iovec __user *, - unsigned long, loff_t *, int); + unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, @@ -2874,9 +2876,9 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags); + rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags); + rwf_t flags); /* fs/block_dev.c */ extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); @@ -3143,7 +3145,7 @@ static inline int iocb_flags(struct file *file) return res; } -static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) +static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0bc1d2e8cc17..138c94535864 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -579,12 +579,12 @@ asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - int flags); + rwf_t flags); asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h); asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec, unsigned long vlen, unsigned long pos_l, unsigned long pos_h, - int flags); + rwf_t flags); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode); asmlinkage long sys_chdir(const char __user *filename); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index a2d4a8ac94ca..a04adbc70ddf 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -28,6 +28,7 @@ #define __LINUX__AIO_ABI_H #include +#include #include typedef __kernel_ulong_t aio_context_t; @@ -62,14 +63,6 @@ struct io_event { __s64 res2; /* secondary result */ }; -#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) -#define PADDED(x,y) x, y -#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) -#define PADDED(x,y) y, x -#else -#error edit for your odd byteorder. -#endif - /* * we always use a 64bit off_t when communicating * with userland. its up to libraries to do the @@ -79,8 +72,16 @@ struct io_event { struct iocb { /* these are internal to the kernel/libc. */ __u64 aio_data; /* data to be returned in event's data */ - __u32 PADDED(aio_key, aio_rw_flags); - /* the kernel sets aio_key to the req # */ + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + __u32 aio_key; /* the kernel sets aio_key to the req # */ + __kernel_rwf_t aio_rw_flags; /* RWF_* flags */ +#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) + __kernel_rwf_t aio_rw_flags; /* RWF_* flags */ + __u32 aio_key; /* the kernel sets aio_key to the req # */ +#else +#error edit for your odd byteorder. +#endif /* common fields */ __u16 aio_lio_opcode; /* see IOCB_CMD_ above */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index b7495d05e8de..56235dddea7d 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -358,13 +358,25 @@ struct fscrypt_key { #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 -/* flags for preadv2/pwritev2: */ -#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */ -#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ -#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ -#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */ - -#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\ - RWF_NOWAIT) +/* + * Flags for preadv2/pwritev2: + */ + +typedef int __bitwise __kernel_rwf_t; + +/* high priority request, poll if possible */ +#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) + +/* per-IO O_DSYNC */ +#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002) + +/* per-IO O_SYNC */ +#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004) + +/* per-IO, return -EAGAIN if operation would block */ +#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) + +/* mask of flags supported by the kernel */ +#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT) #endif /* _UAPI_LINUX_FS_H */ -- cgit From f58e76c1c551c7577b25a6fe493d82f5214331b7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Aug 2017 15:29:10 -0700 Subject: : Fix copy_in_user() declaration copy_in_user() copies data from user-space address @from to user- space address @to. Hence declare both @from and @to as user-space pointers. Fixes: commit d597580d3737 ("generic ...copy_..._user primitives") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Al Viro --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index acdd6f915a8d..20ef8e6ec2db 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -156,7 +156,7 @@ copy_to_user(void __user *to, const void *from, unsigned long n) } #ifdef CONFIG_COMPAT static __always_inline unsigned long __must_check -copy_in_user(void __user *to, const void *from, unsigned long n) +copy_in_user(void __user *to, const void __user *from, unsigned long n) { might_fault(); if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n)) -- cgit From 1797f5b3cf0b3a73c42b89f7a8fd897417373730 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Thu, 31 Aug 2017 17:59:12 +0200 Subject: devlink: Add IPv6 header for dpipe This will be used by the IPv6 host table which will be introduced in the following patches. The fields in the header are added per-use. This header is global and can be reused by many drivers. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index aaf7178127a2..b9654e133599 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -330,6 +330,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; #else diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6c172548589d..0cbca96c66b9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -234,9 +234,14 @@ enum devlink_dpipe_field_ipv4_id { DEVLINK_DPIPE_FIELD_IPV4_DST_IP, }; +enum devlink_dpipe_field_ipv6_id { + DEVLINK_DPIPE_FIELD_IPV6_DST_IP, +}; + enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_ETHERNET, DEVLINK_DPIPE_HEADER_IPV4, + DEVLINK_DPIPE_HEADER_IPV6, }; #endif /* _UAPI_LINUX_DEVLINK_H_ */ -- cgit From 065e63f951432068ba89a844fcbff68ea16ee186 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 31 Aug 2017 17:03:47 -0400 Subject: tracing: Only have rmmod clear buffers that its events were active in Currently, when a module event is enabled, when that module is removed, it clears all ring buffers. This is to prevent another module from being loaded and having one of its trace event IDs from reusing a trace event ID of the removed module. This could cause undesirable effects as the trace event of the new module would be using its own processing algorithms to process raw data of another event. To prevent this, when a module is loaded, if any of its events have been used (signified by the WAS_ENABLED event call flag, which is never cleared), all ring buffers are cleared, just in case any one of them contains event data of the removed event. The problem is, there's no reason to clear all ring buffers if only one (or less than all of them) uses one of the events. Instead, only clear the ring buffers that recorded the events of a module that is being removed. To do this, instead of keeping the WAS_ENABLED flag with the trace event call, move it to the per instance (per ring buffer) event file descriptor. The event file descriptor maps each event to a separate ring buffer instance. Then when the module is removed, only the ring buffers that activated one of the module's events get cleared. The rest are not touched. Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 536c80ff7ad9..3702b9cb5dc8 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -217,7 +217,6 @@ enum { TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, - TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, @@ -229,9 +228,6 @@ enum { * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file - * WAS_ENABLED - Set and stays set when an event was ever enabled - * (used for module unloading, if a module event is enabled, - * it is best to clear the buffers that used it). * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe @@ -241,7 +237,6 @@ enum { TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), - TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), @@ -306,6 +301,7 @@ enum { EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, EVENT_FILE_FL_PID_FILTER_BIT, + EVENT_FILE_FL_WAS_ENABLED_BIT, }; /* @@ -321,6 +317,7 @@ enum { * TRIGGER_MODE - When set, invoke the triggers associated with the event * TRIGGER_COND - When set, one or more triggers has an associated filter * PID_FILTER - When set, the event is filtered based on pid + * WAS_ENABLED - Set when enabled to know to clear trace on module removal */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), @@ -333,6 +330,7 @@ enum { EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), + EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT), }; struct trace_event_file { -- cgit From 5deb67f77a266010e2c10fb124b7516d0d258ce8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 31 Aug 2017 12:27:09 +0100 Subject: libnvdimm, nd_blk: remove mmio_flush_range() mmio_flush_range() suffers from a lack of clearly-defined semantics, and is somewhat ambiguous to port to other architectures where the scope of the writeback implied by "flush" and ordering might matter, but MMIO would tend to imply non-cacheable anyway. Per the rationale in 67a3e8fe9015 ("nd_blk: change aperture mapping from WC to WB"), the only existing use is actually to invalidate clean cache lines for ARCH_MEMREMAP_PMEM type mappings *without* writeback. Since the recent cleanup of the pmem API, that also now happens to be the exact purpose of arch_invalidate_pmem(), which would be a far more well-defined tool for the job. Rather than risk potentially inconsistent implementations of mmio_flush_range() for the sake of one callsite, streamline things by removing it entirely and instead move the ARCH_MEMREMAP_PMEM related definitions up to the libnvdimm level, so they can be shared by NFIT as well. This allows NFIT to be enabled for arm64. Signed-off-by: Robin Murphy Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 9b8d81a7b80e..3eaad2fbf284 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -174,4 +174,19 @@ u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); + +#ifdef CONFIG_ARCH_HAS_PMEM_API +#define ARCH_MEMREMAP_PMEM MEMREMAP_WB +void arch_wb_cache_pmem(void *addr, size_t size); +void arch_invalidate_pmem(void *addr, size_t size); +#else +#define ARCH_MEMREMAP_PMEM MEMREMAP_WT +static inline void arch_wb_cache_pmem(void *addr, size_t size) +{ +} +static inline void arch_invalidate_pmem(void *addr, size_t size) +{ +} +#endif + #endif /* __LIBNVDIMM_H__ */ -- cgit From a4d1a885251382250ec315482bdd8ca52dd61e6a Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 31 Aug 2017 17:17:26 -0400 Subject: dax: update to new mmu_notifier semantic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all mmu_notifier_invalidate_page() calls by *_invalidate_range() and make sure it is bracketed by calls to *_invalidate_range_start()/end(). Note that because we can not presume the pmd value or pte value we have to assume the worst and unconditionaly report an invalidation as happening. Signed-off-by: Jérôme Glisse Cc: Dan Williams Cc: Ross Zwisler Cc: Bernhard Held Cc: Adam Borowski Cc: Andrea Arcangeli Cc: Radim Krčmář Cc: Wanpeng Li Cc: Paolo Bonzini Cc: Takashi Iwai Cc: Nadav Amit Cc: Mike Galbraith Cc: Kirill A. Shutemov Cc: axie Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 46b9ac5e8569..c1f6c95f3496 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1260,6 +1260,7 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, + unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); -- cgit From 5f32b265400de723ab0db23101a75ac073bdd980 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 31 Aug 2017 17:17:38 -0400 Subject: mm/mmu_notifier: kill invalidate_page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The invalidate_page callback suffered from two pitfalls. First it used to happen after the page table lock was release and thus a new page might have setup before the call to invalidate_page() happened. This is in a weird way fixed by commit c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()") that moved the callback under the page table lock but this also broke several existing users of the mmu_notifier API that assumed they could sleep inside this callback. The second pitfall was invalidate_page() being the only callback not taking a range of address in respect to invalidation but was giving an address and a page. Lots of the callback implementers assumed this could never be THP and thus failed to invalidate the appropriate range for THP. By killing this callback we unify the mmu_notifier callback API to always take a virtual address range as input. Finally this also simplifies the end user life as there is now two clear choices: - invalidate_range_start()/end() callback (which allow you to sleep) - invalidate_range() where you can not sleep but happen right after page table update under page table lock Signed-off-by: Jérôme Glisse Cc: Bernhard Held Cc: Adam Borowski Cc: Andrea Arcangeli Cc: Radim Krčmář Cc: Wanpeng Li Cc: Paolo Bonzini Cc: Takashi Iwai Cc: Nadav Amit Cc: Mike Galbraith Cc: Kirill A. Shutemov Cc: axie Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index c91b3bcd158f..7b2e31b1745a 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -94,17 +94,6 @@ struct mmu_notifier_ops { unsigned long address, pte_t pte); - /* - * Before this is invoked any secondary MMU is still ok to - * read/write to the page previously pointed to by the Linux - * pte because the page hasn't been freed yet and it won't be - * freed until this returns. If required set_page_dirty has to - * be called internally to this method. - */ - void (*invalidate_page)(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address); - /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_sem and/or the @@ -220,8 +209,6 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); -extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address); extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end); extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, @@ -268,13 +255,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, __mmu_notifier_change_pte(mm, address, pte); } -static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address) -{ - if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_page(mm, address); -} - static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { @@ -442,11 +422,6 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, { } -static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address) -{ -} - static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { -- cgit From c03567a8e8d5cf2aaca40e605c48f319dc2ead57 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Thu, 31 Aug 2017 16:15:33 -0700 Subject: include/linux/compiler.h: don't perform compiletime_assert with -O0 Commit c7acec713d14 ("kernel.h: handle pointers to arrays better in container_of()") made use of __compiletime_assert() from container_of() thus increasing the usage of this macro, allowing developers to notice type conflicts in usage of container_of() at compile time. However, the implementation of __compiletime_assert relies on compiler optimizations to report an error. This means that if a developer uses "-O0" with any code that performs container_of(), the compiler will always report an error regardless of whether there is an actual problem in the code. This patch disables compile_time_assert when optimizations are disabled to allow such code to compile with CFLAGS="-O0". Example compilation failure: ./include/linux/compiler.h:547:38: error: call to `__compiletime_assert_94' declared with attribute error: pointer type mismatch in container_of() _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ ./include/linux/compiler.h:530:4: note: in definition of macro `__compiletime_assert' prefix ## suffix(); \ ^~~~~~ ./include/linux/compiler.h:547:2: note: in expansion of macro `_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^~~~~~~~~~~~~~~~~~~ ./include/linux/build_bug.h:46:37: note: in expansion of macro `compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ ./include/linux/kernel.h:860:2: note: in expansion of macro `BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^~~~~~~~~~~~~~~~ [akpm@linux-foundation.org: use do{}while(0), per Michal] Link: http://lkml.kernel.org/r/20170829230114.11662-1-joe@ovn.org Fixes: c7acec713d14c6c ("kernel.h: handle pointers to arrays better in container_of()") Signed-off-by: Joe Stringer Cc: Ian Abbott Cc: Arnd Bergmann Cc: Michal Nazarewicz Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index eca8ad75e28b..043b60de041e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -517,7 +517,8 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __compiletime_error_fallback(condition) do { } while (0) #endif -#define __compiletime_assert(condition, msg, prefix, suffix) \ +#ifdef __OPTIMIZE__ +# define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ bool __cond = !(condition); \ extern void prefix ## suffix(void) __compiletime_error(msg); \ @@ -525,6 +526,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s prefix ## suffix(); \ __compiletime_error_fallback(__cond); \ } while (0) +#else +# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +#endif #define _compiletime_assert(condition, msg, prefix, suffix) \ __compiletime_assert(condition, msg, prefix, suffix) -- cgit From 0a9c869d5c568054a828a38357f30d77659e5b1e Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Mon, 21 Aug 2017 13:59:01 +0200 Subject: clk: gate: expose clk_gate_ops::is_enabled This patch exposes clk_gate_ops::is_enabled as functions that can be directly called and assigned in places like this so we don't need wrapper functions that do nothing besides forward the call. Signed-off-by: Gabriel Fernandez Suggested-by: Stephen Boyd Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 1fc113fbf955..5100ec1b5d55 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -343,6 +343,7 @@ struct clk_hw *clk_hw_register_gate(struct device *dev, const char *name, u8 clk_gate_flags, spinlock_t *lock); void clk_unregister_gate(struct clk *clk); void clk_hw_unregister_gate(struct clk_hw *hw); +int clk_gate_is_enabled(struct clk_hw *hw); struct clk_div_table { unsigned int val; -- cgit From 3e4d618b0722b64c551c3f2fc4c4f9cb3558ed93 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Mon, 21 Aug 2017 13:59:02 +0200 Subject: clk: stm32h7: Add stm32h743 clock driver This patch enables clocks for STM32H743 boards. Signed-off-by: Gabriel Fernandez for MFD changes: Acked-by: Lee Jones for DT-Bindings Acked-by: Rob Herring Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/stm32h7-clks.h | 165 +++++++++++++++++++++++++++++++ include/dt-bindings/mfd/stm32h7-rcc.h | 136 +++++++++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 include/dt-bindings/clock/stm32h7-clks.h create mode 100644 include/dt-bindings/mfd/stm32h7-rcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/stm32h7-clks.h b/include/dt-bindings/clock/stm32h7-clks.h new file mode 100644 index 000000000000..6637272b3242 --- /dev/null +++ b/include/dt-bindings/clock/stm32h7-clks.h @@ -0,0 +1,165 @@ +/* SYS, CORE AND BUS CLOCKS */ +#define SYS_D1CPRE 0 +#define HCLK 1 +#define PCLK1 2 +#define PCLK2 3 +#define PCLK3 4 +#define PCLK4 5 +#define HSI_DIV 6 +#define HSE_1M 7 +#define I2S_CKIN 8 +#define CK_DSI_PHY 9 +#define HSE_CK 10 +#define LSE_CK 11 +#define CSI_KER_DIV122 12 +#define RTC_CK 13 +#define CPU_SYSTICK 14 + +/* OSCILLATOR BANK */ +#define OSC_BANK 18 +#define HSI_CK 18 +#define HSI_KER_CK 19 +#define CSI_CK 20 +#define CSI_KER_CK 21 +#define RC48_CK 22 +#define LSI_CK 23 + +/* MCLOCK BANK */ +#define MCLK_BANK 28 +#define PER_CK 28 +#define PLLSRC 29 +#define SYS_CK 30 +#define TRACEIN_CK 31 + +/* ODF BANK */ +#define ODF_BANK 32 +#define PLL1_P 32 +#define PLL1_Q 33 +#define PLL1_R 34 +#define PLL2_P 35 +#define PLL2_Q 36 +#define PLL2_R 37 +#define PLL3_P 38 +#define PLL3_Q 39 +#define PLL3_R 40 + +/* MCO BANK */ +#define MCO_BANK 41 +#define MCO1 41 +#define MCO2 42 + +/* PERIF BANK */ +#define PERIF_BANK 50 +#define D1SRAM1_CK 50 +#define ITCM_CK 51 +#define DTCM2_CK 52 +#define DTCM1_CK 53 +#define FLITF_CK 54 +#define JPGDEC_CK 55 +#define DMA2D_CK 56 +#define MDMA_CK 57 +#define USB2ULPI_CK 58 +#define USB1ULPI_CK 59 +#define ETH1RX_CK 60 +#define ETH1TX_CK 61 +#define ETH1MAC_CK 62 +#define ART_CK 63 +#define DMA2_CK 64 +#define DMA1_CK 65 +#define D2SRAM3_CK 66 +#define D2SRAM2_CK 67 +#define D2SRAM1_CK 68 +#define HASH_CK 69 +#define CRYPT_CK 70 +#define CAMITF_CK 71 +#define BKPRAM_CK 72 +#define HSEM_CK 73 +#define BDMA_CK 74 +#define CRC_CK 75 +#define GPIOK_CK 76 +#define GPIOJ_CK 77 +#define GPIOI_CK 78 +#define GPIOH_CK 79 +#define GPIOG_CK 80 +#define GPIOF_CK 81 +#define GPIOE_CK 82 +#define GPIOD_CK 83 +#define GPIOC_CK 84 +#define GPIOB_CK 85 +#define GPIOA_CK 86 +#define WWDG1_CK 87 +#define DAC12_CK 88 +#define WWDG2_CK 89 +#define TIM14_CK 90 +#define TIM13_CK 91 +#define TIM12_CK 92 +#define TIM7_CK 93 +#define TIM6_CK 94 +#define TIM5_CK 95 +#define TIM4_CK 96 +#define TIM3_CK 97 +#define TIM2_CK 98 +#define MDIOS_CK 99 +#define OPAMP_CK 100 +#define CRS_CK 101 +#define TIM17_CK 102 +#define TIM16_CK 103 +#define TIM15_CK 104 +#define TIM8_CK 105 +#define TIM1_CK 106 +#define TMPSENS_CK 107 +#define RTCAPB_CK 108 +#define VREF_CK 109 +#define COMP12_CK 110 +#define SYSCFG_CK 111 + +/* KERNEL BANK */ +#define KERN_BANK 120 +#define SDMMC1_CK 120 +#define QUADSPI_CK 121 +#define FMC_CK 122 +#define USB2OTG_CK 123 +#define USB1OTG_CK 124 +#define ADC12_CK 125 +#define SDMMC2_CK 126 +#define RNG_CK 127 +#define ADC3_CK 128 +#define DSI_CK 129 +#define LTDC_CK 130 +#define USART8_CK 131 +#define USART7_CK 132 +#define HDMICEC_CK 133 +#define I2C3_CK 134 +#define I2C2_CK 135 +#define I2C1_CK 136 +#define UART5_CK 137 +#define UART4_CK 138 +#define USART3_CK 139 +#define USART2_CK 140 +#define SPDIFRX_CK 141 +#define SPI3_CK 142 +#define SPI2_CK 143 +#define LPTIM1_CK 144 +#define FDCAN_CK 145 +#define SWP_CK 146 +#define HRTIM_CK 147 +#define DFSDM1_CK 148 +#define SAI3_CK 149 +#define SAI2_CK 150 +#define SAI1_CK 151 +#define SPI5_CK 152 +#define SPI4_CK 153 +#define SPI1_CK 154 +#define USART6_CK 155 +#define USART1_CK 156 +#define SAI4B_CK 157 +#define SAI4A_CK 158 +#define LPTIM5_CK 159 +#define LPTIM4_CK 160 +#define LPTIM3_CK 161 +#define LPTIM2_CK 162 +#define I2C4_CK 163 +#define SPI6_CK 164 +#define LPUART1_CK 165 + +#define STM32H7_MAX_CLKS 166 diff --git a/include/dt-bindings/mfd/stm32h7-rcc.h b/include/dt-bindings/mfd/stm32h7-rcc.h new file mode 100644 index 000000000000..461a8e04453a --- /dev/null +++ b/include/dt-bindings/mfd/stm32h7-rcc.h @@ -0,0 +1,136 @@ +/* + * This header provides constants for the STM32H7 RCC IP + */ + +#ifndef _DT_BINDINGS_MFD_STM32H7_RCC_H +#define _DT_BINDINGS_MFD_STM32H7_RCC_H + +/* AHB3 */ +#define STM32H7_RCC_AHB3_MDMA 0 +#define STM32H7_RCC_AHB3_DMA2D 4 +#define STM32H7_RCC_AHB3_JPGDEC 5 +#define STM32H7_RCC_AHB3_FMC 12 +#define STM32H7_RCC_AHB3_QUADSPI 14 +#define STM32H7_RCC_AHB3_SDMMC1 16 +#define STM32H7_RCC_AHB3_CPU 31 + +#define STM32H7_AHB3_RESET(bit) (STM32H7_RCC_AHB3_##bit + (0x7C * 8)) + +/* AHB1 */ +#define STM32H7_RCC_AHB1_DMA1 0 +#define STM32H7_RCC_AHB1_DMA2 1 +#define STM32H7_RCC_AHB1_ADC12 5 +#define STM32H7_RCC_AHB1_ART 14 +#define STM32H7_RCC_AHB1_ETH1MAC 15 +#define STM32H7_RCC_AHB1_USB1OTG 25 +#define STM32H7_RCC_AHB1_USB2OTG 27 + +#define STM32H7_AHB1_RESET(bit) (STM32H7_RCC_AHB1_##bit + (0x80 * 8)) + +/* AHB2 */ +#define STM32H7_RCC_AHB2_CAMITF 0 +#define STM32H7_RCC_AHB2_CRYPT 4 +#define STM32H7_RCC_AHB2_HASH 5 +#define STM32H7_RCC_AHB2_RNG 6 +#define STM32H7_RCC_AHB2_SDMMC2 9 + +#define STM32H7_AHB2_RESET(bit) (STM32H7_RCC_AHB2_##bit + (0x84 * 8)) + +/* AHB4 */ +#define STM32H7_RCC_AHB4_GPIOA 0 +#define STM32H7_RCC_AHB4_GPIOB 1 +#define STM32H7_RCC_AHB4_GPIOC 2 +#define STM32H7_RCC_AHB4_GPIOD 3 +#define STM32H7_RCC_AHB4_GPIOE 4 +#define STM32H7_RCC_AHB4_GPIOF 5 +#define STM32H7_RCC_AHB4_GPIOG 6 +#define STM32H7_RCC_AHB4_GPIOH 7 +#define STM32H7_RCC_AHB4_GPIOI 8 +#define STM32H7_RCC_AHB4_GPIOJ 9 +#define STM32H7_RCC_AHB4_GPIOK 10 +#define STM32H7_RCC_AHB4_CRC 19 +#define STM32H7_RCC_AHB4_BDMA 21 +#define STM32H7_RCC_AHB4_ADC3 24 +#define STM32H7_RCC_AHB4_HSEM 25 + +#define STM32H7_AHB4_RESET(bit) (STM32H7_RCC_AHB4_##bit + (0x88 * 8)) + +/* APB3 */ +#define STM32H7_RCC_APB3_LTDC 3 +#define STM32H7_RCC_APB3_DSI 4 + +#define STM32H7_APB3_RESET(bit) (STM32H7_RCC_APB3_##bit + (0x8C * 8)) + +/* APB1L */ +#define STM32H7_RCC_APB1L_TIM2 0 +#define STM32H7_RCC_APB1L_TIM3 1 +#define STM32H7_RCC_APB1L_TIM4 2 +#define STM32H7_RCC_APB1L_TIM5 3 +#define STM32H7_RCC_APB1L_TIM6 4 +#define STM32H7_RCC_APB1L_TIM7 5 +#define STM32H7_RCC_APB1L_TIM12 6 +#define STM32H7_RCC_APB1L_TIM13 7 +#define STM32H7_RCC_APB1L_TIM14 8 +#define STM32H7_RCC_APB1L_LPTIM1 9 +#define STM32H7_RCC_APB1L_SPI2 14 +#define STM32H7_RCC_APB1L_SPI3 15 +#define STM32H7_RCC_APB1L_SPDIF_RX 16 +#define STM32H7_RCC_APB1L_USART2 17 +#define STM32H7_RCC_APB1L_USART3 18 +#define STM32H7_RCC_APB1L_UART4 19 +#define STM32H7_RCC_APB1L_UART5 20 +#define STM32H7_RCC_APB1L_I2C1 21 +#define STM32H7_RCC_APB1L_I2C2 22 +#define STM32H7_RCC_APB1L_I2C3 23 +#define STM32H7_RCC_APB1L_HDMICEC 27 +#define STM32H7_RCC_APB1L_DAC12 29 +#define STM32H7_RCC_APB1L_USART7 30 +#define STM32H7_RCC_APB1L_USART8 31 + +#define STM32H7_APB1L_RESET(bit) (STM32H7_RCC_APB1L_##bit + (0x90 * 8)) + +/* APB1H */ +#define STM32H7_RCC_APB1H_CRS 1 +#define STM32H7_RCC_APB1H_SWP 2 +#define STM32H7_RCC_APB1H_OPAMP 4 +#define STM32H7_RCC_APB1H_MDIOS 5 +#define STM32H7_RCC_APB1H_FDCAN 8 + +#define STM32H7_APB1H_RESET(bit) (STM32H7_RCC_APB1H_##bit + (0x94 * 8)) + +/* APB2 */ +#define STM32H7_RCC_APB2_TIM1 0 +#define STM32H7_RCC_APB2_TIM8 1 +#define STM32H7_RCC_APB2_USART1 4 +#define STM32H7_RCC_APB2_USART6 5 +#define STM32H7_RCC_APB2_SPI1 12 +#define STM32H7_RCC_APB2_SPI4 13 +#define STM32H7_RCC_APB2_TIM15 16 +#define STM32H7_RCC_APB2_TIM16 17 +#define STM32H7_RCC_APB2_TIM17 18 +#define STM32H7_RCC_APB2_SPI5 20 +#define STM32H7_RCC_APB2_SAI1 22 +#define STM32H7_RCC_APB2_SAI2 23 +#define STM32H7_RCC_APB2_SAI3 24 +#define STM32H7_RCC_APB2_DFSDM1 28 +#define STM32H7_RCC_APB2_HRTIM 29 + +#define STM32H7_APB2_RESET(bit) (STM32H7_RCC_APB2_##bit + (0x98 * 8)) + +/* APB4 */ +#define STM32H7_RCC_APB4_SYSCFG 1 +#define STM32H7_RCC_APB4_LPUART1 3 +#define STM32H7_RCC_APB4_SPI6 5 +#define STM32H7_RCC_APB4_I2C4 7 +#define STM32H7_RCC_APB4_LPTIM2 9 +#define STM32H7_RCC_APB4_LPTIM3 10 +#define STM32H7_RCC_APB4_LPTIM4 11 +#define STM32H7_RCC_APB4_LPTIM5 12 +#define STM32H7_RCC_APB4_COMP12 14 +#define STM32H7_RCC_APB4_VREF 15 +#define STM32H7_RCC_APB4_SAI4 21 +#define STM32H7_RCC_APB4_TMPSENS 26 + +#define STM32H7_APB4_RESET(bit) (STM32H7_RCC_APB4_##bit + (0x9C * 8)) + +#endif /* _DT_BINDINGS_MFD_STM32H7_RCC_H */ -- cgit From 64104f703212ff50e855bb2e2fa80d71db62c521 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 30 Aug 2017 16:58:39 -0700 Subject: scsi: Call scsi_initialize_rq() for filesystem requests If a pass-through request is submitted then blk_get_request() initializes that request by calling scsi_initialize_rq(). Also call this function for filesystem requests. Introduce CMD_INITIALIZED to keep track of whether or not a request has already been initialized. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Brian King Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_cmnd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index f5afcff8d76f..a9f8f7e79d83 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -57,6 +57,9 @@ struct scsi_pointer { /* for scmd->flags */ #define SCMD_TAGGED (1 << 0) #define SCMD_UNCHECKED_ISA_DMA (1 << 1) +#define SCMD_INITIALIZED (1 << 3) +/* flags preserved across unprep / reprep */ +#define SCMD_PRESERVED_FLAGS (SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED) struct scsi_cmnd { struct scsi_request req; -- cgit From 482dca939fb7ee35ba20b944b4c2476133dbf0df Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 31 Aug 2017 15:05:44 -0700 Subject: bpf: Add mark and priority to sock options that can be set Add socket mark and priority to fields that can be set by ebpf program when a socket is created. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 08c206a863e1..ba848b761cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -758,6 +758,8 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 -- cgit From abcc61537e3566cae7f1fd225f2dcb82b3595fe3 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 31 Aug 2017 10:04:24 +0200 Subject: ANDROID: binder: Add BINDER_GET_NODE_DEBUG_INFO ioctl The BINDER_GET_NODE_DEBUG_INFO ioctl will return debug info on a node. Each successive call reusing the previous return value will return the next node. The data will be used by libmemunreachable to mark the pointers with kernel references as reachable. Signed-off-by: Colin Cross Signed-off-by: Martijn Coenen Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 7668b5791c91..84a9a0944e13 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -186,6 +186,19 @@ struct binder_version { #define BINDER_CURRENT_PROTOCOL_VERSION 8 #endif +/* + * Use with BINDER_GET_NODE_DEBUG_INFO, driver reads ptr, writes to all fields. + * Set ptr to NULL for the first call to get the info for the first node, and + * then repeat the call passing the previously returned value to get the next + * nodes. ptr will be 0 when there are no more nodes. + */ +struct binder_node_debug_info { + binder_uintptr_t ptr; + binder_uintptr_t cookie; + __u32 has_strong_ref; + __u32 has_weak_ref; +}; + #define BINDER_WRITE_READ _IOWR('b', 1, struct binder_write_read) #define BINDER_SET_IDLE_TIMEOUT _IOW('b', 3, __s64) #define BINDER_SET_MAX_THREADS _IOW('b', 5, __u32) @@ -193,6 +206,7 @@ struct binder_version { #define BINDER_SET_CONTEXT_MGR _IOW('b', 7, __s32) #define BINDER_THREAD_EXIT _IOW('b', 8, __s32) #define BINDER_VERSION _IOWR('b', 9, struct binder_version) +#define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info) /* * NOTE: Two special error codes you should check for when calling -- cgit From b32dbc1e0bf0eaa8bdd725491b361d5fc2f57a85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 17:12:18 +0200 Subject: dma-coherent: remove the DMA_MEMORY_INCLUDES_CHILDREN flag This flag was never implemented or used. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-mapping.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4c98cc96971f..abf369b11f54 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -696,8 +696,7 @@ static inline int dma_get_cache_alignment(void) /* flags for the coherent memory api */ #define DMA_MEMORY_MAP 0x01 #define DMA_MEMORY_IO 0x02 -#define DMA_MEMORY_INCLUDES_CHILDREN 0x04 -#define DMA_MEMORY_EXCLUSIVE 0x08 +#define DMA_MEMORY_EXCLUSIVE 0x04 #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, -- cgit From 2436bdcda53ff4abb7897c87fa29ef3de8055344 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 17:13:09 +0200 Subject: dma-coherent: remove the DMA_MEMORY_MAP and DMA_MEMORY_IO flags DMA_MEMORY_IO was never used in the tree, so remove it. That means there is no need for the DMA_MEMORY_MAP flag either now, so remove it as well and change dma_declare_coherent_memory to return a normal errno value. Signed-off-by: Christoph Hellwig Reviewed-by: Marek Szyprowski --- include/linux/dma-mapping.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index abf369b11f54..b7dd11c02a45 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -694,9 +694,7 @@ static inline int dma_get_cache_alignment(void) #endif /* flags for the coherent memory api */ -#define DMA_MEMORY_MAP 0x01 -#define DMA_MEMORY_IO 0x02 -#define DMA_MEMORY_EXCLUSIVE 0x04 +#define DMA_MEMORY_EXCLUSIVE 0x01 #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, @@ -709,7 +707,7 @@ static inline int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags) { - return 0; + return -ENOSYS; } static inline void -- cgit From a728f56094e7cf60a1dc0642fe86901d1a4dfb4e Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Thu, 17 Aug 2017 13:42:36 +0200 Subject: ASoC: make clock direction configurable in asoc-simple Some CPU drivers (e. g. davinci-mcasp) may require the system clock to be configured as OUT, while there's no good way currently to set SND_SOC_CLK_OUT in simple-soc driver if the clock is fixed-rate. This patch makes asoc_simple_card_init_dai() initialize clock to SND_SOCK_CLK_OUT if explicitly stated in the relevant dts file. This change is transparent and doesn't change the default behavior. Signed-off-by: Vitaly Wool Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 42c6a6ac3ce6..7e25afce6566 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -15,6 +15,7 @@ struct asoc_simple_dai { const char *name; unsigned int sysclk; + int clk_direction; int slots; int slot_width; unsigned int tx_slot_mask; -- cgit From 0890540e21cf1156b4cf960a4c1c734db4e816f9 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 30 Aug 2017 17:12:05 +0100 Subject: drm/i915: add GT number to intel_device_info Up to Coffeelake we could deduce this GT number from the device ID. This doesn't seem to be the case anymore. This change reorders pciids per GT and adds a gt field to intel_device_info. We set this field on the following platforms : - SNB/IVB/HSW/BDW/SKL/KBL/CFL/CNL Before & After : $ modinfo drivers/gpu/drm/i915/i915.ko | grep ^alias | wc -l 209 v2: Add SNB & IVB (Chris) v3: Fix compilation error in early-quirks (Lionel) v4: Fix inconsistency between FEATURE/PLATFORM macros (Ville) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20170830161208.29221-2-lionel.g.landwerlin@intel.com --- include/drm/i915_pciids.h | 152 +++++++++++++++++++++++++++++----------------- 1 file changed, 95 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 34c8f5600ce0..1257e15c1a03 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -118,92 +118,125 @@ #define INTEL_IRONLAKE_M_IDS(info) \ INTEL_VGA_DEVICE(0x0046, info) -#define INTEL_SNB_D_IDS(info) \ +#define INTEL_SNB_D_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x0102, info), \ - INTEL_VGA_DEVICE(0x0112, info), \ - INTEL_VGA_DEVICE(0x0122, info), \ INTEL_VGA_DEVICE(0x010A, info) -#define INTEL_SNB_M_IDS(info) \ - INTEL_VGA_DEVICE(0x0106, info), \ +#define INTEL_SNB_D_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0112, info), \ + INTEL_VGA_DEVICE(0x0122, info) + +#define INTEL_SNB_D_IDS(info) \ + INTEL_SNB_D_GT1_IDS(info), \ + INTEL_SNB_D_GT2_IDS(info) + +#define INTEL_SNB_M_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x0106, info) + +#define INTEL_SNB_M_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x0116, info), \ INTEL_VGA_DEVICE(0x0126, info) +#define INTEL_SNB_M_IDS(info) \ + INTEL_SNB_M_GT1_IDS(info), \ + INTEL_SNB_M_GT2_IDS(info) + +#define INTEL_IVB_M_GT1_IDS(info) \ + INTEL_VGA_DEVICE(0x0156, info) /* GT1 mobile */ + +#define INTEL_IVB_M_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */ + #define INTEL_IVB_M_IDS(info) \ - INTEL_VGA_DEVICE(0x0156, info), /* GT1 mobile */ \ - INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */ + INTEL_IVB_M_GT1_IDS(info), \ + INTEL_IVB_M_GT2_IDS(info) -#define INTEL_IVB_D_IDS(info) \ +#define INTEL_IVB_D_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x0152, info), /* GT1 desktop */ \ + INTEL_VGA_DEVICE(0x015a, info) /* GT1 server */ + +#define INTEL_IVB_D_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x0162, info), /* GT2 desktop */ \ - INTEL_VGA_DEVICE(0x015a, info), /* GT1 server */ \ INTEL_VGA_DEVICE(0x016a, info) /* GT2 server */ +#define INTEL_IVB_D_IDS(info) \ + INTEL_IVB_D_GT1_IDS(info), \ + INTEL_IVB_D_GT2_IDS(info) + #define INTEL_IVB_Q_IDS(info) \ INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ -#define INTEL_HSW_IDS(info) \ +#define INTEL_HSW_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ INTEL_VGA_DEVICE(0x040a, info), /* GT1 server */ \ - INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \ - INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \ INTEL_VGA_DEVICE(0x040B, info), /* GT1 reserved */ \ - INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \ - INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \ INTEL_VGA_DEVICE(0x040E, info), /* GT1 reserved */ \ - INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \ - INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \ INTEL_VGA_DEVICE(0x0C02, info), /* SDV GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \ INTEL_VGA_DEVICE(0x0C0A, info), /* SDV GT1 server */ \ - INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \ - INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \ INTEL_VGA_DEVICE(0x0C0B, info), /* SDV GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \ INTEL_VGA_DEVICE(0x0C0E, info), /* SDV GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \ INTEL_VGA_DEVICE(0x0A02, info), /* ULT GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \ INTEL_VGA_DEVICE(0x0A0A, info), /* ULT GT1 server */ \ - INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \ - INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \ INTEL_VGA_DEVICE(0x0A0B, info), /* ULT GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \ INTEL_VGA_DEVICE(0x0D02, info), /* CRW GT1 desktop */ \ - INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \ - INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \ INTEL_VGA_DEVICE(0x0D0A, info), /* CRW GT1 server */ \ - INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \ - INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \ INTEL_VGA_DEVICE(0x0D0B, info), /* CRW GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ - INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ - INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0A0E, info), /* ULX GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0D06, info) /* CRW GT1 mobile */ + +#define INTEL_HSW_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ + INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \ + INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \ + INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \ + INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \ + INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \ + INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \ INTEL_VGA_DEVICE(0x0C16, info), /* SDV GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \ - INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \ INTEL_VGA_DEVICE(0x0A16, info), /* ULT GT2 mobile */ \ - INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \ - INTEL_VGA_DEVICE(0x0A0E, info), /* ULX GT1 mobile */ \ INTEL_VGA_DEVICE(0x0A1E, info), /* ULX GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0D16, info) /* CRW GT2 mobile */ + +#define INTEL_HSW_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ + INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \ + INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \ + INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \ + INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \ + INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \ + INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D2E, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \ + INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \ INTEL_VGA_DEVICE(0x0A2E, info), /* ULT GT3 reserved */ \ - INTEL_VGA_DEVICE(0x0D06, info), /* CRW GT1 mobile */ \ - INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ +#define INTEL_HSW_IDS(info) \ + INTEL_HSW_GT1_IDS(info), \ + INTEL_HSW_GT2_IDS(info), \ + INTEL_HSW_GT3_IDS(info) + #define INTEL_VLV_IDS(info) \ INTEL_VGA_DEVICE(0x0f30, info), \ INTEL_VGA_DEVICE(0x0f31, info), \ @@ -212,17 +245,19 @@ INTEL_VGA_DEVICE(0x0157, info), \ INTEL_VGA_DEVICE(0x0155, info) -#define INTEL_BDW_GT12_IDS(info) \ +#define INTEL_BDW_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \ INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \ INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \ INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \ - INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ + INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ + INTEL_VGA_DEVICE(0x160D, info) /* GT1 Workstation */ + +#define INTEL_BDW_GT2_IDS(info) \ + INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \ INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \ INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \ - INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ - INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \ - INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \ + INTEL_VGA_DEVICE(0x161E, info), /* GT2 ULX */ \ INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \ INTEL_VGA_DEVICE(0x161D, info) /* GT2 Workstation */ @@ -243,7 +278,8 @@ INTEL_VGA_DEVICE(0x163D, info) /* Workstation */ #define INTEL_BDW_IDS(info) \ - INTEL_BDW_GT12_IDS(info), \ + INTEL_BDW_GT1_IDS(info), \ + INTEL_BDW_GT2_IDS(info), \ INTEL_BDW_GT3_IDS(info), \ INTEL_BDW_RSVD_IDS(info) @@ -335,20 +371,22 @@ INTEL_KBL_GT4_IDS(info) /* CFL S */ -#define INTEL_CFL_S_IDS(info) \ +#define INTEL_CFL_S_GT1_IDS(info) \ INTEL_VGA_DEVICE(0x3E90, info), /* SRV GT1 */ \ - INTEL_VGA_DEVICE(0x3E93, info), /* SRV GT1 */ \ + INTEL_VGA_DEVICE(0x3E93, info) /* SRV GT1 */ + +#define INTEL_CFL_S_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E91, info), /* SRV GT2 */ \ INTEL_VGA_DEVICE(0x3E92, info), /* SRV GT2 */ \ INTEL_VGA_DEVICE(0x3E96, info) /* SRV GT2 */ /* CFL H */ -#define INTEL_CFL_H_IDS(info) \ +#define INTEL_CFL_H_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x3E9B, info), /* Halo GT2 */ \ INTEL_VGA_DEVICE(0x3E94, info) /* Halo GT2 */ /* CFL U */ -#define INTEL_CFL_U_IDS(info) \ +#define INTEL_CFL_U_GT3_IDS(info) \ INTEL_VGA_DEVICE(0x3EA6, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA7, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x3EA8, info), /* ULT GT3 */ \ -- cgit From 25cc72a33835ed8a6f53180a822cadab855852ac Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 10:52:31 +0200 Subject: mlxsw: spectrum: Forbid linking to devices that have uppers The mlxsw driver relies on NETDEV_CHANGEUPPER events to configure the device in case a port is enslaved to a master netdev such as bridge or bond. Since the driver ignores events unrelated to its ports and their uppers, it's possible to engineer situations in which the device's data path differs from the kernel's. One example to such a situation is when a port is enslaved to a bond that is already enslaved to a bridge. When the bond was enslaved the driver ignored the event - as the bond wasn't one of its uppers - and therefore a bridge port instance isn't created in the device. Until such configurations are supported forbid them by checking that the upper device doesn't have uppers of its own. Fixes: 0d65fc13042f ("mlxsw: spectrum: Implement LAG port join/leave") Signed-off-by: Ido Schimmel Reported-by: Nogah Frankel Tested-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 779b23595596..c99ba7914c0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3866,6 +3866,8 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev); +bool netdev_has_any_upper_dev(struct net_device *dev); + void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); void *netdev_lower_get_next_private_rcu(struct net_device *dev, -- cgit From 799ea9e9c59949008770aab4e1da87f10e99dbe4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 18 Aug 2017 18:08:25 -0700 Subject: xfs: evict all inodes involved with log redo item When we introduced the bmap redo log items, we set MS_ACTIVE on the mountpoint and XFS_IRECOVERY on the inode to prevent unlinked inodes from being truncated prematurely during log recovery. This also had the effect of putting linked inodes on the lru instead of evicting them. Unfortunately, we neglected to find all those unreferenced lru inodes and evict them after finishing log recovery, which means that we leak them if anything goes wrong in the rest of xfs_mountfs, because the lru is only cleaned out on unmount. Therefore, evict unreferenced inodes in the lru list immediately after clearing MS_ACTIVE. Fixes: 17c12bcd30 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") Signed-off-by: Darrick J. Wong Cc: viro@ZenIV.linux.org.uk Reviewed-by: Brian Foster --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..00f46e018370 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2831,6 +2831,7 @@ static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); extern unsigned int get_next_ino(void); +extern void evict_inodes(struct super_block *sb); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); -- cgit From 8db6c34f1dbc8e06aa016a9b829b06902c3e1340 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 May 2017 13:11:56 -0500 Subject: Introduce v3 namespaced file capabilities Root in a non-initial user ns cannot be trusted to write a traditional security.capability xattr. If it were allowed to do so, then any unprivileged user on the host could map his own uid to root in a private namespace, write the xattr, and execute the file with privilege on the host. However supporting file capabilities in a user namespace is very desirable. Not doing so means that any programs designed to run with limited privilege must continue to support other methods of gaining and dropping privilege. For instance a program installer must detect whether file capabilities can be assigned, and assign them if so but set setuid-root otherwise. The program in turn must know how to drop partial capabilities, and do so only if setuid-root. This patch introduces v3 of the security.capability xattr. It builds a vfs_ns_cap_data struct by appending a uid_t rootid to struct vfs_cap_data. This is the absolute uid_t (that is, the uid_t in user namespace which mounted the filesystem, usually init_user_ns) of the root id in whose namespaces the file capabilities may take effect. When a task asks to write a v2 security.capability xattr, if it is privileged with respect to the userns which mounted the filesystem, then nothing should change. Otherwise, the kernel will transparently rewrite the xattr as a v3 with the appropriate rootid. This is done during the execution of setxattr() to catch user-space-initiated capability writes. Subsequently, any task executing the file which has the noted kuid as its root uid, or which is in a descendent user_ns of such a user_ns, will run the file with capabilities. Similarly when asking to read file capabilities, a v3 capability will be presented as v2 if it applies to the caller's namespace. If a task writes a v3 security.capability, then it can provide a uid for the xattr so long as the uid is valid in its own user namespace, and it is privileged with CAP_SETFCAP over its namespace. The kernel will translate that rootid to an absolute uid, and write that to disk. After this, a task in the writer's namespace will not be able to use those capabilities (unless rootid was 0), but a task in a namespace where the given uid is root will. Only a single security.capability xattr may exist at a time for a given file. A task may overwrite an existing xattr so long as it is privileged over the inode. Note this is a departure from previous semantics, which required privilege to remove a security.capability xattr. This check can be re-added if deemed useful. This allows a simple setxattr to work, allows tar/untar to work, and allows us to tar in one namespace and untar in another while preserving the capability, without risking leaking privilege into a parent namespace. Example using tar: $ cp /bin/sleep sleepx $ mkdir b1 b2 $ lxc-usernsexec -m b:0:100000:1 -m b:1:$(id -u):1 -- chown 0:0 b1 $ lxc-usernsexec -m b:0:100001:1 -m b:1:$(id -u):1 -- chown 0:0 b2 $ lxc-usernsexec -m b:0:100000:1000 -- tar --xattrs-include=security.capability --xattrs -cf b1/sleepx.tar sleepx $ lxc-usernsexec -m b:0:100001:1000 -- tar --xattrs-include=security.capability --xattrs -C b2 -xf b1/sleepx.tar $ lxc-usernsexec -m b:0:100001:1000 -- getcap b2/sleepx b2/sleepx = cap_sys_admin+ep # /opt/ltp/testcases/bin/getv3xattr b2/sleepx v3 xattr, rootid is 100001 A patch to linux-test-project adding a new set of tests for this functionality is in the nsfscaps branch at github.com/hallyn/ltp Changelog: Nov 02 2016: fix invalid check at refuse_fcap_overwrite() Nov 07 2016: convert rootid from and to fs user_ns (From ebiederm: mar 28 2017) commoncap.c: fix typos - s/v4/v3 get_vfs_caps_from_disk: clarify the fs_ns root access check nsfscaps: change the code split for cap_inode_setxattr() Apr 09 2017: don't return v3 cap for caps owned by current root. return a v2 cap for a true v2 cap in non-init ns Apr 18 2017: . Change the flow of fscap writing to support s_user_ns writing. . Remove refuse_fcap_overwrite(). The value of the previous xattr doesn't matter. Apr 24 2017: . incorporate Eric's incremental diff . move cap_convert_nscap to setxattr and simplify its usage May 8, 2017: . fix leaking dentry refcount in cap_inode_getsecurity Signed-off-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/linux/capability.h | 2 ++ include/linux/security.h | 2 ++ include/uapi/linux/capability.h | 22 +++++++++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 6ffb67e10c06..b52e278e4744 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -248,4 +248,6 @@ extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); +extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); + #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/security.h b/include/linux/security.h index b6ea1dc9cc9d..6fff8c924718 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -91,6 +91,8 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_inode_getsecurity(struct inode *inode, const char *name, + void **buffer, bool alloc); extern int cap_mmap_addr(unsigned long addr); extern int cap_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags); diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 6fe14d001f68..230e05d35191 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -60,9 +60,13 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32_2 2 #define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2)) -#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2 -#define VFS_CAP_U32 VFS_CAP_U32_2 -#define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#define VFS_CAP_REVISION_3 0x03000000 +#define VFS_CAP_U32_3 2 +#define XATTR_CAPS_SZ_3 (sizeof(__le32)*(2 + 2*VFS_CAP_U32_3)) + +#define XATTR_CAPS_SZ XATTR_CAPS_SZ_3 +#define VFS_CAP_U32 VFS_CAP_U32_3 +#define VFS_CAP_REVISION VFS_CAP_REVISION_3 struct vfs_cap_data { __le32 magic_etc; /* Little endian */ @@ -72,6 +76,18 @@ struct vfs_cap_data { } data[VFS_CAP_U32]; }; +/* + * same as vfs_cap_data but with a rootid at the end + */ +struct vfs_ns_cap_data { + __le32 magic_etc; + struct { + __le32 permitted; /* Little endian */ + __le32 inheritable; /* Little endian */ + } data[VFS_CAP_U32]; + __le32 rootid; +}; + #ifndef __KERNEL__ /* -- cgit From d897246df9fc0a5df97a784bf7b072be4a6ae479 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 31 Aug 2017 13:47:35 -0700 Subject: fsmap: fix documentation of FMR_OF_LAST The FMR_OF_LAST flag is set on the last fsmap record being returned for the dataset requested, contrary to what the header file says. Fix the docs to reflect the behavior of all fsmap implementations. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- include/uapi/linux/fsmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h index 7e8e5f0bd6d2..e5213c3e38b2 100644 --- a/include/uapi/linux/fsmap.h +++ b/include/uapi/linux/fsmap.h @@ -96,7 +96,7 @@ fsmap_advance( #define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */ #define FMR_OF_SHARED 0x8 /* segment = shared with another file */ #define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */ -#define FMR_OF_LAST 0x20 /* segment is the last in the FS */ +#define FMR_OF_LAST 0x20 /* segment is the last in the dataset */ /* Each FS gets to define its own special owner codes. */ #define FMR_OWNER(type, code) (((__u64)type << 32) | \ -- cgit From 65bce46298d064dff9db1282e17bb26602715819 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 1 Sep 2017 13:41:17 -0700 Subject: Bluetooth: make baswap src const Signed-off-by: Loic Poulain Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 01487192f628..020142bb9735 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -233,7 +233,7 @@ static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src) memcpy(dst, src, sizeof(bdaddr_t)); } -void baswap(bdaddr_t *dst, bdaddr_t *src); +void baswap(bdaddr_t *dst, const bdaddr_t *src); /* Common socket structures and functions */ -- cgit From 0142626d08223b0f6ad04859301b53178f11c317 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 1 Sep 2017 16:35:20 -0500 Subject: PCI: Correct kernel-doc of pci_vpd_srdt_size(), pci_vpd_srdt_tag() The kernel-doc comments don't match the arguments, so fix the comments. Signed-off-by: Shawn Lin Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..7950fc697394 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2059,7 +2059,7 @@ static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt) /** * pci_vpd_srdt_size - Extracts the Small Resource Data Type length - * @lrdt: Pointer to the beginning of the Small Resource Data Type tag + * @srdt: Pointer to the beginning of the Small Resource Data Type tag * * Returns the extracted Small Resource Data Type length. */ @@ -2070,7 +2070,7 @@ static inline u8 pci_vpd_srdt_size(const u8 *srdt) /** * pci_vpd_srdt_tag - Extracts the Small Resource Data Type Tag Item - * @lrdt: Pointer to the beginning of the Small Resource Data Type tag + * @srdt: Pointer to the beginning of the Small Resource Data Type tag * * Returns the extracted Small Resource Data Type Tag Item. */ -- cgit From 96291d565550c1fd363e488cc17cb3189d2e4cc2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 1 Sep 2017 16:35:50 -0500 Subject: PCI: Fix typos and whitespace errors Fix various typos and whitespace errors: s/Synopsis/Synopsys/ s/Designware/DesignWare/ s/Keystine/Keystone/ s/gpio/GPIO/ s/pcie/PCIe/ s/phy/PHY/ s/confgiruation/configuration/ No functional change intended. Signed-off-by: Bjorn Helgaas --- include/linux/aer.h | 5 ++--- include/linux/pcieport_if.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/aer.h b/include/linux/aer.h index 04602cbe85dc..43799bd17a02 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -39,7 +39,7 @@ struct aer_capability_regs { }; #if defined(CONFIG_PCIEAER) -/* pci-e port driver needs this function to enable aer */ +/* PCIe port driver needs this function to enable AER */ int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); @@ -67,7 +67,6 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, - int severity, - struct aer_capability_regs *aer_regs); + int severity, struct aer_capability_regs *aer_regs); #endif //_AER_H_ diff --git a/include/linux/pcieport_if.h b/include/linux/pcieport_if.h index afcd130ab3a9..12542f5ac83d 100644 --- a/include/linux/pcieport_if.h +++ b/include/linux/pcieport_if.h @@ -38,7 +38,7 @@ static inline void set_service_data(struct pcie_device *dev, void *data) dev->priv_data = data; } -static inline void* get_service_data(struct pcie_device *dev) +static inline void *get_service_data(struct pcie_device *dev) { return dev->priv_data; } -- cgit From 0865805d82d4c822647ee35ab2629c48cc40706b Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Thu, 10 Aug 2017 08:34:03 +0200 Subject: clk: at91: add audio pll clock drivers This new clock driver set allows to have a fractional divided clock that would generate a precise clock particularly suitable for audio applications. The main audio pll clock has two children clocks: one that is connected to the PMC, the other that can directly drive a pad. As these two routes have different enable bits and different dividers and divider formulas, they are handled by two different drivers. Each of them could modify the rate of the main audio pll parent. The main audio pll clock can output 620MHz to 700MHz. Signed-off-by: Nicolas Ferre Signed-off-by: Quentin Schulz Acked-by: Boris Brezillon Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 17f413bbbedf..6aca5ce8a99a 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -185,4 +185,29 @@ #define AT91_PMC_PCR_EN (0x1 << 28) /* Enable */ #define AT91_PMC_PCR_GCKEN (0x1 << 29) /* GCK Enable */ +#define AT91_PMC_AUDIO_PLL0 0x14c +#define AT91_PMC_AUDIO_PLL_PLLEN (1 << 0) +#define AT91_PMC_AUDIO_PLL_PADEN (1 << 1) +#define AT91_PMC_AUDIO_PLL_PMCEN (1 << 2) +#define AT91_PMC_AUDIO_PLL_RESETN (1 << 3) +#define AT91_PMC_AUDIO_PLL_ND_OFFSET 8 +#define AT91_PMC_AUDIO_PLL_ND_MASK (0x7f << AT91_PMC_AUDIO_PLL_ND_OFFSET) +#define AT91_PMC_AUDIO_PLL_ND(n) ((n) << AT91_PMC_AUDIO_PLL_ND_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPMC_OFFSET 16 +#define AT91_PMC_AUDIO_PLL_QDPMC_MASK (0x7f << AT91_PMC_AUDIO_PLL_QDPMC_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPMC(n) ((n) << AT91_PMC_AUDIO_PLL_QDPMC_OFFSET) + +#define AT91_PMC_AUDIO_PLL1 0x150 +#define AT91_PMC_AUDIO_PLL_FRACR_MASK 0x3fffff +#define AT91_PMC_AUDIO_PLL_QDPAD_OFFSET 24 +#define AT91_PMC_AUDIO_PLL_QDPAD_MASK (0x7f << AT91_PMC_AUDIO_PLL_QDPAD_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPAD(n) ((n) << AT91_PMC_AUDIO_PLL_QDPAD_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPAD_DIV_OFFSET AT91_PMC_AUDIO_PLL_QDPAD_OFFSET +#define AT91_PMC_AUDIO_PLL_QDPAD_DIV_MASK (0x3 << AT91_PMC_AUDIO_PLL_QDPAD_DIV_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPAD_DIV(n) ((n) << AT91_PMC_AUDIO_PLL_QDPAD_DIV_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV_OFFSET 26 +#define AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV_MAX 0x1f +#define AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV_MASK (AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV_MAX << AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV_OFFSET) +#define AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV(n) ((n) << AT91_PMC_AUDIO_PLL_QDPAD_EXTDIV_OFFSET) + #endif -- cgit From 7c89717f82bd305e3102963485f3da160d11bcf6 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 27 Aug 2017 22:34:53 -0700 Subject: remoteproc: Introduce rproc handle accessor for children In certain circumstances rpmsg devices needs to acquire a handle to the ancestor remoteproc instance, e.g. to invoke rproc_report_crash() when a fatal error is detected. Introduce an interface that walks the device tree in search for a remoteproc instance and return this. Tested-by: Suman Anna Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 81da49564ff4..44e630eb3d94 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -510,6 +510,8 @@ struct rproc_vdev { }; struct rproc *rproc_get_by_phandle(phandle phandle); +struct rproc *rproc_get_by_child(struct device *dev); + struct rproc *rproc_alloc(struct device *dev, const char *name, const struct rproc_ops *ops, const char *firmware, int len); -- cgit From b37e88407c1d78f157778d73427cd7e9e1d6369d Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 31 Aug 2017 09:59:38 -0700 Subject: inet_diag: allow protocols to provide additional data Extend inet_diag_handler to allow individual protocols to report additional data on INET_DIAG_INFO through idiag_get_aux. The size can be dynamic and is computed by idiag_get_aux_size. Signed-off-by: Ivan Delalande Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 65da430e260f..ee251c585854 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -25,6 +25,13 @@ struct inet_diag_handler { struct inet_diag_msg *r, void *info); + int (*idiag_get_aux)(struct sock *sk, + bool net_admin, + struct sk_buff *skb); + + size_t (*idiag_get_aux_size)(struct sock *sk, + bool net_admin); + int (*destroy)(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req); -- cgit From c03fa9bcacd9ac04595cc13f34f3445f0a5ecf13 Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 31 Aug 2017 09:59:39 -0700 Subject: tcp_diag: report TCP MD5 signing keys and addresses Report TCP MD5 (RFC2385) signing keys, addresses and address prefixes to processes with CAP_NET_ADMIN requesting INET_DIAG_INFO. Currently it is not possible to retrieve these from the kernel once they have been configured on sockets. Signed-off-by: Ivan Delalande Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 1 + include/uapi/linux/tcp.h | 9 +++++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 678496897a68..f52ff62bfabe 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -143,6 +143,7 @@ enum { INET_DIAG_MARK, INET_DIAG_BBRINFO, INET_DIAG_CLASS_ID, + INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 030e594bab45..15c25eccab2b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -256,4 +256,13 @@ struct tcp_md5sig { __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ }; +/* INET_DIAG_MD5SIG */ +struct tcp_diag_md5sig { + __u8 tcpm_family; + __u8 tcpm_prefixlen; + __u16 tcpm_keylen; + __be32 tcpm_addr[4]; + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; +}; + #endif /* _UAPI_LINUX_TCP_H */ -- cgit From c1d1b437816f0afa99202be3cb650c9d174667bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2017 16:48:22 -0700 Subject: net: convert (struct ubuf_info)->refcnt to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. v2: added the change in drivers/vhost/net.c as spotted by Willem. Signed-off-by: Eric Dumazet Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f93cc01064cb..f751f3b93039 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -456,7 +457,7 @@ struct ubuf_info { u32 bytelen; }; }; - atomic_t refcnt; + refcount_t refcnt; struct mmpin { struct user_struct *user; @@ -472,7 +473,7 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, static inline void sock_zerocopy_get(struct ubuf_info *uarg) { - atomic_inc(&uarg->refcnt); + refcount_inc(&uarg->refcnt); } void sock_zerocopy_put(struct ubuf_info *uarg); -- cgit From 864150dfa31dceab6ec5ca4579a2d35ede985cb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 1 Sep 2017 12:15:17 +0300 Subject: net: Add module reference to FIB notifiers When a listener registers to the FIB notification chain it receives a dump of the FIB entries and rules from existing address families by invoking their dump operations. While we call into these modules we need to make sure they aren't removed. Do that by increasing their reference count before invoking their dump operations and decrease it afterwards. Fixes: 04b1d4e50e82 ("net: core: Make the FIB notification chain generic") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 241475224f74..669b9716dc7a 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -2,6 +2,7 @@ #define __NET_FIB_NOTIFIER_H #include +#include #include #include @@ -26,6 +27,7 @@ struct fib_notifier_ops { struct list_head list; unsigned int (*fib_seq_read)(struct net *net); int (*fib_dump)(struct net *net, struct notifier_block *nb); + struct module *owner; struct rcu_head rcu; }; -- cgit From 604acb193b8383a3774673c324e576e4c6b4ffcd Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 5 Jun 2017 11:17:20 +0300 Subject: net/mlx5e: Refactor data-path lro header function Refactor function mlx5e_lro_update_hdr() to reduce number of branches. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 3c7442b56460..7031d655ec32 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -709,7 +709,7 @@ static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) return (cqe->op_own >> 2) & 0x3; } -static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) +static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; } -- cgit From 64327fc811268d4a24de03dac242ea29de6be75f Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 31 Aug 2017 18:11:41 +0200 Subject: ipv4: Don't override return code from ip_route_input_noref() After ip_route_input() calls ip_route_input_noref(), another check on skb_dst() is done, but if this fails, we shouldn't override the return code from ip_route_input_noref(), as it could have been more specific (i.e. -EHOSTUNREACH). This also saves one call to skb_dst_force_safe() and one to skb_dst() in case the ip_route_input_noref() check fails. Reported-by: Sabrina Dubroca Fixes: 9df16efadd2a ("ipv4: call dst_hold_safe() properly") Signed-off-by: Stefano Brivio Acked-by: Wei Wang Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/route.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/route.h b/include/net/route.h index cb0a76d9dde1..1b09a9368c68 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -189,10 +189,11 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); - if (!err) + if (!err) { skb_dst_force_safe(skb); - if (!skb_dst(skb)) - err = -EINVAL; + if (!skb_dst(skb)) + err = -EINVAL; + } rcu_read_unlock(); return err; -- cgit From fb452a1aa3fd4034d7999e309c5466ff2d7005aa Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:08 +0200 Subject: Revert "net: use lib/percpu_counter API for fragmentation mem accounting" This reverts commit 6d7b857d541ecd1d9bd997c97242d4ef94b19de2. There is a bug in fragmentation codes use of the percpu_counter API, that can cause issues on systems with many CPUs. The frag_mem_limit() just reads the global counter (fbc->count), without considering other CPUs can have upto batch size (130K) that haven't been subtracted yet. Due to the 3MBytes lower thresh limit, this become dangerous at >=24 CPUs (3*1024*1024/130000=24). The correct API usage would be to use __percpu_counter_compare() which does the right thing, and takes into account the number of (online) CPUs and batch size, to account for this and call __percpu_counter_sum() when needed. We choose to revert the use of the lib/percpu_counter API for frag memory accounting for several reasons: 1) On systems with CPUs > 24, the heavier fully locked __percpu_counter_sum() is always invoked, which will be more expensive than the atomic_t that is reverted to. Given systems with more than 24 CPUs are becoming common this doesn't seem like a good option. To mitigate this, the batch size could be decreased and thresh be increased. 2) The add_frag_mem_limit+sub_frag_mem_limit pairs happen on the RX CPU, before SKBs are pushed into sockets on remote CPUs. Given NICs can only hash on L2 part of the IP-header, the NIC-RXq's will likely be limited. Thus, a fair chance that atomic add+dec happen on the same CPU. Revert note that commit 1d6119baf061 ("net: fix percpu memory leaks") removed init_frag_mem_limit() and instead use inet_frags_init_net(). After this revert, inet_frags_uninit_net() becomes empty. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 6fdcd2427776..fa635aa6d0b9 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -1,14 +1,9 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include - struct netns_frags { - /* The percpu_counter "mem" need to be cacheline aligned. - * mem.count must not share cacheline with other writers - */ - struct percpu_counter mem ____cacheline_aligned_in_smp; - + /* Keep atomic mem on separate cachelines in structs that include it */ + atomic_t mem ____cacheline_aligned_in_smp; /* sysctls */ int timeout; int high_thresh; @@ -110,11 +105,11 @@ void inet_frags_fini(struct inet_frags *); static inline int inet_frags_init_net(struct netns_frags *nf) { - return percpu_counter_init(&nf->mem, 0, GFP_KERNEL); + atomic_set(&nf->mem, 0); + return 0; } static inline void inet_frags_uninit_net(struct netns_frags *nf) { - percpu_counter_destroy(&nf->mem); } void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); @@ -140,31 +135,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -/* The default percpu_counter batch size is not big enough to scale to - * fragmentation mem acct sizes. - * The mem size of a 64K fragment is approx: - * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes - */ -static unsigned int frag_percpu_counter_batch = 130000; - static inline int frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_read(&nf->mem); + return atomic_read(&nf->mem); } static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch); + atomic_sub(i, &nf->mem); } static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch); + atomic_add(i, &nf->mem); } -static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf) +static inline int sum_frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_sum_positive(&nf->mem); + return atomic_read(&nf->mem); } /* RFC 3168 support : -- cgit From 5a63643e583b6a9789d7a225ae076fb4e603991c Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Sep 2017 11:26:13 +0200 Subject: Revert "net: fix percpu memory leaks" This reverts commit 1d6119baf0610f813eb9d9580eb4fd16de5b4ceb. After reverting commit 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") then here is no need for this fix-up patch. As percpu_counter is no longer used, it cannot memory leak it any-longer. Fixes: 6d7b857d541e ("net: use lib/percpu_counter API for fragmentation mem accounting") Fixes: 1d6119baf061 ("net: fix percpu memory leaks") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index fa635aa6d0b9..fc59e0775e00 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -103,15 +103,10 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +static inline void inet_frags_init_net(struct netns_frags *nf) { atomic_set(&nf->mem, 0); - return 0; } -static inline void inet_frags_uninit_net(struct netns_frags *nf) -{ -} - void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -- cgit From b9047726386bb538cf5e4f52a9f04eb556eebc67 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 2 Aug 2017 19:51:11 -0700 Subject: ipc: mqueue: Replace timespec with timespec64 struct timespec is not y2038 safe. Replace all uses of timespec by y2038 safe struct timespec64. Even though timespec is used here to represent timeouts, replace these with timespec64 so that it facilitates in verification by creating a y2038 safe kernel image that is free of timespec. The syscall interfaces themselves are not changed as part of the patch. They will be part of a different series. Signed-off-by: Deepa Dinamani Cc: Paul Moore Cc: Richard Guy Briggs Reviewed-by: Richard Guy Briggs Reviewed-by: Arnd Bergmann Acked-by: Paul Moore Signed-off-by: Al Viro --- include/linux/audit.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2150bdccfbab..74d4d4e8e3db 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -351,7 +351,7 @@ extern int __audit_socketcall(int nargs, unsigned long *args); extern int __audit_sockaddr(int len, void *addr); extern void __audit_fd_pair(int fd1, int fd2); extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr); -extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout); +extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout); extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification); extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, @@ -412,7 +412,7 @@ static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) if (unlikely(!audit_dummy_context())) __audit_mq_open(oflag, mode, attr); } -static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout) +static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec64 *abs_timeout) { if (unlikely(!audit_dummy_context())) __audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout); @@ -549,7 +549,7 @@ static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { } static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, - const struct timespec *abs_timeout) + const struct timespec64 *abs_timeout) { } static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification) -- cgit From 50578ea97a2a352c109bd1657e667b212faf2cbb Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 2 Aug 2017 19:51:12 -0700 Subject: ipc: msg: Make msg_queue timestamps y2038 safe time_t is not y2038 safe. Replace all uses of time_t by y2038 safe time64_t. Similarly, replace the calls to get_seconds() with y2038 safe ktime_get_real_seconds(). Note that this preserves fast access on 64 bit systems, but 32 bit systems need sequence counters. The syscall interfaces themselves are not changed as part of the patch. They will be part of a different series. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/msg.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index 4e5ec3cbf464..05115342daa3 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -2,6 +2,7 @@ #define _LINUX_MSG_H #include +#include #include /* one msg_msg structure for each message */ @@ -17,9 +18,9 @@ struct msg_msg { /* one msq_queue structure for each present queue on the system */ struct msg_queue { struct kern_ipc_perm q_perm; - time_t q_stime; /* last msgsnd time */ - time_t q_rtime; /* last msgrcv time */ - time_t q_ctime; /* last change time */ + time64_t q_stime; /* last msgsnd time */ + time64_t q_rtime; /* last msgrcv time */ + time64_t q_ctime; /* last change time */ unsigned long q_cbytes; /* current number of bytes on queue */ unsigned long q_qnum; /* number of messages in queue */ unsigned long q_qbytes; /* max number of bytes on queue */ -- cgit From e54d02b23c5eed3aa0ffe54e659dfe1c9084c262 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 2 Aug 2017 19:51:13 -0700 Subject: ipc: sem: Make sem_array timestamps y2038 safe time_t is not y2038 safe. Replace all uses of time_t by y2038 safe time64_t. Similarly, replace the calls to get_seconds() with y2038 safe ktime_get_real_seconds(). Note that this preserves fast access on 64 bit systems, but 32 bit systems need sequence counters. The syscall interface themselves are not changed as part of the patch. They will be part of a different series. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/sem.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sem.h b/include/linux/sem.h index be5cf2ea14ad..867f86f1a20b 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -4,6 +4,7 @@ #include #include #include +#include #include struct task_struct; @@ -30,7 +31,7 @@ struct sem { /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ - time_t sem_ctime; /* create/last semctl() time */ + time64_t sem_ctime; /* create/last semctl() time */ struct list_head pending_alter; /* pending operations */ /* that alter the array */ struct list_head pending_const; /* pending complex operations */ -- cgit From 7ff2819e8dd5b528887dfbe4ff395f5d2142edff Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 2 Aug 2017 19:51:14 -0700 Subject: ipc: shm: Make shmid_kernel timestamps y2038 safe time_t is not y2038 safe. Replace all uses of time_t by y2038 safe time64_t. Similarly, replace the calls to get_seconds() with y2038 safe ktime_get_real_seconds(). Note that this preserves fast access on 64 bit systems, but 32 bit systems need sequence counters. The syscall interfaces themselves are not changed as part of the patch. They will be part of a different series. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/shm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/shm.h b/include/linux/shm.h index 04e881829625..7bb897b25309 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -12,9 +12,9 @@ struct shmid_kernel /* private to the kernel */ struct file *shm_file; unsigned long shm_nattch; unsigned long shm_segsz; - time_t shm_atim; - time_t shm_dtim; - time_t shm_ctim; + time64_t shm_atim; + time64_t shm_dtim; + time64_t shm_ctim; pid_t shm_cprid; pid_t shm_lprid; struct user_struct *mlock_user; -- cgit From aaed2dd8a31359e5767ee099ecbb078d55be4d29 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 2 Aug 2017 19:51:15 -0700 Subject: utimes: Make utimes y2038 safe struct timespec is not y2038 safe on 32 bit machines. Replace timespec with y2038 safe struct timespec64. Note that the patch only changes the internals without modifying the syscall interfaces. This will be part of a separate series. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/time.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 4abb32d4c6b8..3d0cd017f0d7 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -178,7 +178,7 @@ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); extern int do_getitimer(int which, struct itimerval *value); -extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags); +extern long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, int flags); /* * Similar to the struct tm in userspace , but it needs to be here so -- cgit From b24a5f293058b512f1685930f2983a20ee3e15ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Sep 2017 19:15:07 +0300 Subject: ALSA: atmel: Remove leftovers of AVR32 removal The ALSA related include header files are left overs after the commit 020c5260c2b1 ("ALSA: atmel: Remove AVR32 bits from the driver") Fixes: 020c5260c2b1 ("ALSA: atmel: Remove AVR32 bits from the driver") Signed-off-by: Andy Shevchenko Signed-off-by: Takashi Iwai --- include/sound/atmel-abdac.h | 23 ----------------------- include/sound/atmel-ac97c.h | 38 -------------------------------------- 2 files changed, 61 deletions(-) delete mode 100644 include/sound/atmel-abdac.h delete mode 100644 include/sound/atmel-ac97c.h (limited to 'include') diff --git a/include/sound/atmel-abdac.h b/include/sound/atmel-abdac.h deleted file mode 100644 index a8f735d677fa..000000000000 --- a/include/sound/atmel-abdac.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Driver for the Atmel Audio Bitstream DAC (ABDAC) - * - * Copyright (C) 2009 Atmel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ -#ifndef __INCLUDE_SOUND_ATMEL_ABDAC_H -#define __INCLUDE_SOUND_ATMEL_ABDAC_H - -#include - -/** - * struct atmel_abdac_pdata - board specific ABDAC configuration - * @dws: DMA slave interface to use for sound playback. - */ -struct atmel_abdac_pdata { - struct dw_dma_slave dws; -}; - -#endif /* __INCLUDE_SOUND_ATMEL_ABDAC_H */ diff --git a/include/sound/atmel-ac97c.h b/include/sound/atmel-ac97c.h deleted file mode 100644 index f2a1cdc37661..000000000000 --- a/include/sound/atmel-ac97c.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Driver for the Atmel AC97C controller - * - * Copyright (C) 2005-2009 Atmel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ -#ifndef __INCLUDE_SOUND_ATMEL_AC97C_H -#define __INCLUDE_SOUND_ATMEL_AC97C_H - -#include - -#define AC97C_CAPTURE 0x01 -#define AC97C_PLAYBACK 0x02 -#define AC97C_BOTH (AC97C_CAPTURE | AC97C_PLAYBACK) - -/** - * struct atmel_ac97c_pdata - board specific AC97C configuration - * @rx_dws: DMA slave interface to use for sound capture. - * @tx_dws: DMA slave interface to use for sound playback. - * @reset_pin: GPIO pin wired to the reset input on the external AC97 codec, - * optional to use, set to -ENODEV if not in use. AC97 layer will - * try to do a software reset of the external codec anyway. - * - * If the user do not want to use a DMA channel for playback or capture, i.e. - * only one feature is required on the board. The slave for playback or capture - * can be set to NULL. The AC97C driver will take use of this when setting up - * the sound streams. - */ -struct ac97c_platform_data { - struct dw_dma_slave rx_dws; - struct dw_dma_slave tx_dws; - int reset_pin; -}; - -#endif /* __INCLUDE_SOUND_ATMEL_AC97C_H */ -- cgit From bea74641e3786d51dcf1175527cc1781420961c9 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Fri, 18 Aug 2017 20:58:59 +0200 Subject: netfilter: xt_hashlimit: add rate match mode This patch adds a new feature to hashlimit that allows matching on the current packet/byte rate without rate limiting. This can be enabled with a new flag --hashlimit-rate-match. The match returns true if the current rate of packets is above/below the user specified value. The main difference between the existing algorithm and the new one is that the existing algorithm rate-limits the flow whereas the new algorithm does not. Instead it *classifies* the flow based on whether it is above or below a certain rate. I will demonstrate this with an example below. Let us assume this rule: iptables -A INPUT -m hashlimit --hashlimit-above 10/s -j new_chain If the packet rate is 15/s, the existing algorithm would ACCEPT 10 packets every second and send 5 packets to "new_chain". But with the new algorithm, as long as the rate of 15/s is sustained, all packets will continue to match and every packet is sent to new_chain. This new functionality will let us classify different flows based on their current rate, so that further decisions can be made on them based on what the current rate is. This is how the new algorithm works: We divide time into intervals of 1 (sec/min/hour) as specified by the user. We keep track of the number of packets/bytes processed in the current interval. After each interval we reset the counter to 0. When we receive a packet for match, we look at the packet rate during the current interval and the previous interval to make a decision: if [ prev_rate < user and cur_rate < user ] return Below else return Above Where cur_rate is the number of packets/bytes seen in the current interval, prev is the number of packets/bytes seen in the previous interval and 'user' is the rate specified by the user. We also provide flexibility to the user for choosing the time interval using the option --hashilmit-interval. For example the user can keep a low rate like x/hour but still keep the interval as small as 1 second. To preserve backwards compatibility we have to add this feature in a new revision, so I've created revision 3 for hashlimit. The two new options we add are: --hashlimit-rate-match --hashlimit-rate-interval I have updated the help text to add these new options. Also added a few tests for the new options. Suggested-by: Igor Lubashev Reviewed-by: Josh Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_hashlimit.h | 3 ++- include/uapi/linux/netfilter/xt_hashlimit.h | 36 ++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h index 074790c0cf74..0fc458bde80b 100644 --- a/include/linux/netfilter/xt_hashlimit.h +++ b/include/linux/netfilter/xt_hashlimit.h @@ -5,5 +5,6 @@ #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \ - XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES) + XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\ + XT_HASHLIMIT_RATE_MATCH) #endif /*_XT_HASHLIMIT_H*/ diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h index 79da349f1060..aa98573248b1 100644 --- a/include/uapi/linux/netfilter/xt_hashlimit.h +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -19,12 +19,13 @@ struct xt_hashlimit_htable; enum { - XT_HASHLIMIT_HASH_DIP = 1 << 0, - XT_HASHLIMIT_HASH_DPT = 1 << 1, - XT_HASHLIMIT_HASH_SIP = 1 << 2, - XT_HASHLIMIT_HASH_SPT = 1 << 3, - XT_HASHLIMIT_INVERT = 1 << 4, - XT_HASHLIMIT_BYTES = 1 << 5, + XT_HASHLIMIT_HASH_DIP = 1 << 0, + XT_HASHLIMIT_HASH_DPT = 1 << 1, + XT_HASHLIMIT_HASH_SIP = 1 << 2, + XT_HASHLIMIT_HASH_SPT = 1 << 3, + XT_HASHLIMIT_INVERT = 1 << 4, + XT_HASHLIMIT_BYTES = 1 << 5, + XT_HASHLIMIT_RATE_MATCH = 1 << 6, }; struct hashlimit_cfg { @@ -79,6 +80,21 @@ struct hashlimit_cfg2 { __u8 srcmask, dstmask; }; +struct hashlimit_cfg3 { + __u64 avg; /* Average secs between packets * scale */ + __u64 burst; /* Period multiplier for upper limit. */ + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u32 interval; + __u8 srcmask, dstmask; +}; + struct xt_hashlimit_mtinfo1 { char name[IFNAMSIZ]; struct hashlimit_cfg1 cfg; @@ -95,4 +111,12 @@ struct xt_hashlimit_mtinfo2 { struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); }; +struct xt_hashlimit_mtinfo3 { + char name[NAME_MAX]; + struct hashlimit_cfg3 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_HASHLIMIT_H */ -- cgit From dfc46034b54af3abf594de75a1ee43ef2ec2a60a Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Wed, 23 Aug 2017 22:41:23 +0200 Subject: netfilter: nf_tables: add select_ops for stateful objects This patch adds support for overloading stateful objects operations through the select_ops() callback, just as it is implemented for expressions. This change is needed for upcoming additions to the stateful objects infrastructure. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f9795fe394f3..0f5b12a4ad09 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1007,12 +1007,12 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * * @list: table stateful object list node * @table: table this object belongs to - * @type: pointer to object type - * @data: pointer to object data * @name: name of this stateful object * @genmask: generation mask * @use: number of references to this stateful object * @data: object data, layout depends on type + * @ops: object operations + * @data: pointer to object data */ struct nft_object { struct list_head list; @@ -1021,7 +1021,7 @@ struct nft_object { u32 genmask:2, use:30; /* runtime data below here */ - const struct nft_object_type *type ____cacheline_aligned; + const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] __attribute__((aligned(__alignof__(u64)))); }; @@ -1044,27 +1044,39 @@ void nft_obj_notify(struct net *net, struct nft_table *table, /** * struct nft_object_type - stateful object type * - * @eval: stateful object evaluation function + * @select_ops: function to select nft_object_ops + * @ops: default ops, used when no select_ops functions is present * @list: list node in list of object types * @type: stateful object numeric type - * @size: stateful object size * @owner: module owner * @maxattr: maximum netlink attribute * @policy: netlink attribute policy + */ +struct nft_object_type { + const struct nft_object_ops *(*select_ops)(const struct nft_ctx *, + const struct nlattr * const tb[]); + const struct nft_object_ops *ops; + struct list_head list; + u32 type; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; +}; + +/** + * struct nft_object_ops - stateful object operations + * + * @eval: stateful object evaluation function + * @size: stateful object size * @init: initialize object from netlink attributes * @destroy: release existing stateful object * @dump: netlink dump stateful object */ -struct nft_object_type { +struct nft_object_ops { void (*eval)(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt); - struct list_head list; - u32 type; unsigned int size; - unsigned int maxattr; - struct module *owner; - const struct nla_policy *policy; int (*init)(const struct nft_ctx *ctx, const struct nlattr *const tb[], struct nft_object *obj); @@ -1072,6 +1084,7 @@ struct nft_object_type { int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); + const struct nft_object_type *type; }; int nft_register_obj(struct nft_object_type *obj_type); -- cgit From a691205571723cb0544110ca91653ac4b0eb5b17 Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Wed, 23 Aug 2017 22:41:25 +0200 Subject: netfilter: nft_limit: add stateful object type Register a new limit stateful object type into the stateful object infrastructure. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b49da72efa68..871afa4871bf 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1282,7 +1282,8 @@ enum nft_ct_helper_attributes { #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 #define NFT_OBJECT_CT_HELPER 3 -#define __NFT_OBJECT_MAX 4 +#define NFT_OBJECT_LIMIT 4 +#define __NFT_OBJECT_MAX 5 #define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** -- cgit From d1c1e39de8357d66163da39e893e38ea1410e8f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Aug 2017 12:04:10 +0200 Subject: netfilter: remove unused hooknum arg from packet functions tested with allmodconfig build. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack_l4proto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d4933d56809d..738a0307a96b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -43,7 +43,6 @@ struct nf_conntrack_l4proto { unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts); /* Called when a new connection for this protocol found; -- cgit From 44d6e2f27328b254111dd716fde45b3b59b8a4f7 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 30 Aug 2017 13:37:11 +0530 Subject: net: Replace NF_CT_ASSERT() with WARN_ON(). This patch removes NF_CT_ASSERT() and instead uses WARN_ON(). Signed-off-by: Varsha Rao --- include/net/netfilter/nf_conntrack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 6e6f678aaac7..0385cb08c478 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -159,7 +159,7 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) /* decrement reference count on a conntrack */ static inline void nf_ct_put(struct nf_conn *ct) { - NF_CT_ASSERT(ct); + WARN_ON(!ct); nf_conntrack_put(&ct->ct_general); } -- cgit From 9efdb14f76f4d7591cd4d7a436ebd716b19703b6 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Wed, 30 Aug 2017 13:37:12 +0530 Subject: net: Remove CONFIG_NETFILTER_DEBUG and _ASSERT() macros. This patch removes CONFIG_NETFILTER_DEBUG and _ASSERT() macros as they are no longer required. Replace _ASSERT() macros with WARN_ON(). Signed-off-by: Varsha Rao Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 0385cb08c478..fdc9c64a1c94 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -44,12 +44,6 @@ union nf_conntrack_expect_proto { #include #include -#ifdef CONFIG_NETFILTER_DEBUG -#define NF_CT_ASSERT(x) WARN_ON(!(x)) -#else -#define NF_CT_ASSERT(x) -#endif - #include #include -- cgit From 095a7e388b5fbf8958686a90a04fe9387f6aa50b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:12 -0700 Subject: irqchip: mips-gic: Remove counter access functions The MIPS GIC clocksource driver is no longer using the accessor functions provided by the irqchip driver, so remove them. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17022/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 2b0e56619e53..c9e1c993cf5b 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -40,11 +40,6 @@ #define GIC_SH_CONFIG_OFS 0x0000 -/* Shared Global Counter */ -#define GIC_SH_COUNTER_31_00_OFS 0x0010 -/* 64-bit counter register for CM3 */ -#define GIC_SH_COUNTER_OFS GIC_SH_COUNTER_31_00_OFS -#define GIC_SH_COUNTER_63_32_OFS 0x0014 #define GIC_SH_REVISIONID_OFS 0x0020 /* Convert an interrupt number to a byte offset/bit for multi-word registers */ @@ -107,10 +102,6 @@ #define GIC_VPE_WD_CONFIG0_OFS 0x0090 #define GIC_VPE_WD_COUNT0_OFS 0x0094 #define GIC_VPE_WD_INITIAL0_OFS 0x0098 -#define GIC_VPE_COMPARE_LO_OFS 0x00a0 -/* 64-bit Compare register on CM3 */ -#define GIC_VPE_COMPARE_OFS GIC_VPE_COMPARE_LO_OFS -#define GIC_VPE_COMPARE_HI_OFS 0x00a4 #define GIC_VPE_EIC_SHADOW_SET_BASE_OFS 0x0100 #define GIC_VPE_EIC_SS(intr) (4 * (intr)) @@ -128,12 +119,6 @@ #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 /* Masks */ -#define GIC_SH_CONFIG_COUNTSTOP_SHF 28 -#define GIC_SH_CONFIG_COUNTSTOP_MSK (MSK(1) << GIC_SH_CONFIG_COUNTSTOP_SHF) - -#define GIC_SH_CONFIG_COUNTBITS_SHF 24 -#define GIC_SH_CONFIG_COUNTBITS_MSK (MSK(4) << GIC_SH_CONFIG_COUNTBITS_SHF) - #define GIC_SH_CONFIG_NUMINTRS_SHF 16 #define GIC_SH_CONFIG_NUMINTRS_MSK (MSK(8) << GIC_SH_CONFIG_NUMINTRS_SHF) @@ -258,13 +243,6 @@ extern unsigned int gic_present; extern void gic_init(unsigned long gic_base_addr, unsigned long gic_addrspace_size, unsigned int cpu_vec, unsigned int irqbase); -extern u64 gic_read_count(void); -extern unsigned int gic_get_count_width(void); -extern u64 gic_read_compare(void); -extern void gic_write_compare(u64 cnt); -extern void gic_write_cpu_compare(u64 cnt, int cpu); -extern void gic_start_count(void); -extern void gic_stop_count(void); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); -- cgit From 9762d2e6d329a500dc6c07156e3c56aab3992471 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:14 -0700 Subject: irqchip: mips-gic: Remove gic_read_local_vp_id() Nothing needs gic_read_local_vp_id() any longer, so remove the dead code. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17024/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index c9e1c993cf5b..29453bdc06e2 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -98,7 +98,6 @@ #define GIC_VPE_SWINT0_MAP_OFS 0x0054 #define GIC_VPE_SWINT1_MAP_OFS 0x0058 #define GIC_VPE_OTHER_ADDR_OFS 0x0080 -#define GIC_VP_IDENT_OFS 0x0088 #define GIC_VPE_WD_CONFIG0_OFS 0x0090 #define GIC_VPE_WD_COUNT0_OFS 0x0094 #define GIC_VPE_WD_INITIAL0_OFS 0x0098 @@ -197,10 +196,6 @@ #define GIC_VPE_SMASK_FDC_SHF 6 #define GIC_VPE_SMASK_FDC_MSK (MSK(1) << GIC_VPE_SMASK_FDC_SHF) -/* GIC_VP_IDENT fields */ -#define GIC_VP_IDENT_VCNUM_SHF 0 -#define GIC_VP_IDENT_VCNUM_MSK (MSK(6) << GIC_VP_IDENT_VCNUM_SHF) - /* GIC nomenclature for Core Interrupt Pins. */ #define GIC_CPU_INT0 0 /* Core Interrupt 2 */ #define GIC_CPU_INT1 1 /* . */ @@ -260,16 +255,4 @@ static inline int gic_get_usm_range(struct resource *gic_usm_res) #endif /* CONFIG_MIPS_GIC */ -/** - * gic_read_local_vp_id() - read the local VPs VCNUM - * - * Read the VCNUM of the local VP from the GIC_VP_IDENT register and - * return it to the caller. This ID should be used to refer to the VP - * via the GICs VP-other region, or when calculating an offset to a - * bit representing the VP in interrupt masks. - * - * Return: The VCNUM value for the local VP. - */ -extern unsigned gic_read_local_vp_id(void); - #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit From e98fcb2a8cc003c37c24713d2303667a8f624a30 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:16 -0700 Subject: irqchip: mips-gic: Simplify shared interrupt pending/mask reads Simplify the reads of the bitmaps indicating pending & masked interrupts in gic_handle_shared_int() using the __ioread32_copy() & __ioread64_copy() helper functions. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17026/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 29453bdc06e2..835e25506660 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -68,12 +68,6 @@ #define GIC_SH_RMASK_OFS 0x0300 #define GIC_SH_SMASK_OFS 0x0380 -/* Global Interrupt Mask Register (RO) - Bit Set == Interrupt enabled */ -#define GIC_SH_MASK_OFS 0x0400 - -/* Pending Global Interrupts (RO) */ -#define GIC_SH_PEND_OFS 0x0480 - /* Maps Interrupt X to a Pin */ #define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500 #define GIC_SH_MAP_TO_PIN(intr) (4 * (intr)) -- cgit From a0dc5cb5e31bdbcf1f1dddf62a62d06d6b82d53a Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:17 -0700 Subject: irqchip: mips-gic: Simplify gic_local_irq_domain_map() Simplify gic_local_irq_domain_map() by: - Moving the check for invalid IRQs outside of the loop. - Moving the decision about whether to use gic_cpu_pin or timer_cpu_pin outside of the loop. - Using the new write_gic_vo_map() accessor function to avoid the need to handle each map register separately. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17027/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 835e25506660..1342b17b6812 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -84,13 +84,7 @@ #define GIC_VPE_MASK_OFS 0x0008 #define GIC_VPE_RMASK_OFS 0x000c #define GIC_VPE_SMASK_OFS 0x0010 -#define GIC_VPE_WD_MAP_OFS 0x0040 -#define GIC_VPE_COMPARE_MAP_OFS 0x0044 #define GIC_VPE_TIMER_MAP_OFS 0x0048 -#define GIC_VPE_FDC_MAP_OFS 0x004c -#define GIC_VPE_PERFCTR_MAP_OFS 0x0050 -#define GIC_VPE_SWINT0_MAP_OFS 0x0054 -#define GIC_VPE_SWINT1_MAP_OFS 0x0058 #define GIC_VPE_OTHER_ADDR_OFS 0x0080 #define GIC_VPE_WD_CONFIG0_OFS 0x0090 #define GIC_VPE_WD_COUNT0_OFS 0x0094 -- cgit From 87554b0ef3884143563e090375269730780c6617 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:18 -0700 Subject: irqchip: mips-gic: Drop gic_(re)set_mask() functions The gic_set_mask() & gic_reset_mask() functions are now no more convenient to call than the write_gic_smask() or write_gic_rmask() accessor functions. Remove the layer of abstraction. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17028/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 1342b17b6812..8160cc8b677d 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -64,10 +64,6 @@ /* Set/Clear corresponding bit in Edge Detect Register */ #define GIC_SH_WEDGE_OFS 0x0280 -/* Mask manipulation */ -#define GIC_SH_RMASK_OFS 0x0300 -#define GIC_SH_SMASK_OFS 0x0380 - /* Maps Interrupt X to a Pin */ #define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500 #define GIC_SH_MAP_TO_PIN(intr) (4 * (intr)) -- cgit From 80e5f9c9e295b08c5c588083a4ad35e1e5f78731 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:19 -0700 Subject: irqchip: mips-gic: Remove gic_set_polarity() Remove the gic_set_polarity() function in favour of using the new change_gic_pol() accessor function which provides equivalent functionality. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17029/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 8160cc8b677d..960e49a64e7a 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -14,8 +14,6 @@ #define GIC_MAX_INTRS 256 /* Constants */ -#define GIC_POL_POS 1 -#define GIC_POL_NEG 0 #define GIC_TRIG_EDGE 1 #define GIC_TRIG_LEVEL 0 #define GIC_TRIG_DUAL_ENABLE 1 @@ -52,9 +50,6 @@ }) #define GIC_INTR_BIT(intr) ((intr) % (mips_cm_is64 ? 64 : 32)) -/* Polarity : Reset Value is always 0 */ -#define GIC_SH_SET_POLARITY_OFS 0x0100 - /* Triggering : Reset Value is always 0 */ #define GIC_SH_SET_TRIGGER_OFS 0x0180 -- cgit From 471aa962a6acc19a990e20fa3846db40e62120cc Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:20 -0700 Subject: irqchip: mips-gic: Remove gic_set_trigger() Remove the gic_set_trigger() function in favour of using the new change_gic_trig() accessor function which provides equivalent functionality. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17030/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 960e49a64e7a..2e9f0c43d425 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -14,8 +14,6 @@ #define GIC_MAX_INTRS 256 /* Constants */ -#define GIC_TRIG_EDGE 1 -#define GIC_TRIG_LEVEL 0 #define GIC_TRIG_DUAL_ENABLE 1 #define GIC_TRIG_DUAL_DISABLE 0 @@ -50,9 +48,6 @@ }) #define GIC_INTR_BIT(intr) ((intr) % (mips_cm_is64 ? 64 : 32)) -/* Triggering : Reset Value is always 0 */ -#define GIC_SH_SET_TRIGGER_OFS 0x0180 - /* Dual edge triggering : Reset Value is always 0 */ #define GIC_SH_SET_DUAL_OFS 0x0200 -- cgit From c26ba670cdb84e0556436be7bf68a75a1d4f4d76 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:21 -0700 Subject: irqchip: mips-gic: Remove gic_set_dual_edge() Remove the gic_set_dual_edge() function in favour of using the new change_gic_dual() accessor function which provides equivalent functionality. This also allows us to remove the gic_update_bits() function which gic_set_dual_edge() was the last user of, along with the GIC_INTR_OFS() & GIC_INTR_BIT() macros. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17031/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 2e9f0c43d425..bd348fc9db18 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -13,10 +13,6 @@ #define GIC_MAX_INTRS 256 -/* Constants */ -#define GIC_TRIG_DUAL_ENABLE 1 -#define GIC_TRIG_DUAL_DISABLE 0 - #define MSK(n) ((1 << (n)) - 1) /* Accessors */ @@ -38,19 +34,6 @@ #define GIC_SH_REVISIONID_OFS 0x0020 -/* Convert an interrupt number to a byte offset/bit for multi-word registers */ -#define GIC_INTR_OFS(intr) ({ \ - unsigned bits = mips_cm_is64 ? 64 : 32; \ - unsigned reg_idx = (intr) / bits; \ - unsigned reg_width = bits / 8; \ - \ - reg_idx * reg_width; \ -}) -#define GIC_INTR_BIT(intr) ((intr) % (mips_cm_is64 ? 64 : 32)) - -/* Dual edge triggering : Reset Value is always 0 */ -#define GIC_SH_SET_DUAL_OFS 0x0200 - /* Set/Clear corresponding bit in Edge Detect Register */ #define GIC_SH_WEDGE_OFS 0x0280 -- cgit From d3e8cf44792d60047ba45036abfc38ab414e49e5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:22 -0700 Subject: irqchip: mips-gic: Remove gic_map_to_pin() Remove the gic_map_to_pin() function in favour of using the new write_gic_map_pin() accessor function which isn't any more complex to use & allows us to drop a level of abstraction. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17032/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index bd348fc9db18..dea79a7a54cc 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -37,10 +37,6 @@ /* Set/Clear corresponding bit in Edge Detect Register */ #define GIC_SH_WEDGE_OFS 0x0280 -/* Maps Interrupt X to a Pin */ -#define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500 -#define GIC_SH_MAP_TO_PIN(intr) (4 * (intr)) - /* Maps Interrupt X to a VPE */ #define GIC_SH_INTR_MAP_TO_VPE_BASE_OFS 0x2000 #define GIC_SH_MAP_TO_VPE_REG_OFF(intr, vpe) \ @@ -84,12 +80,6 @@ #define GIC_SH_WEDGE_SET(intr) ((intr) | (0x1 << 31)) #define GIC_SH_WEDGE_CLR(intr) ((intr) & ~(0x1 << 31)) -#define GIC_MAP_TO_PIN_SHF 31 -#define GIC_MAP_TO_PIN_MSK (MSK(1) << GIC_MAP_TO_PIN_SHF) -#define GIC_MAP_TO_NMI_SHF 30 -#define GIC_MAP_TO_NMI_MSK (MSK(1) << GIC_MAP_TO_NMI_SHF) -#define GIC_MAP_TO_YQ_SHF 29 -#define GIC_MAP_TO_YQ_MSK (MSK(1) << GIC_MAP_TO_YQ_SHF) #define GIC_MAP_SHF 0 #define GIC_MAP_MSK (MSK(6) << GIC_MAP_SHF) -- cgit From 0efe3cbf1504309bf088f74eb627d0987f67e934 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:23 -0700 Subject: irqchip: mips-gic: Remove gic_map_to_vpe() Remove the gic_map_to_vpe() function in favour of using the new write_gic_map_vp() accessor function which isn't any more complex to use & allows us to drop a level of abstraction. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17033/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index dea79a7a54cc..ad8b216b6056 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -37,12 +37,6 @@ /* Set/Clear corresponding bit in Edge Detect Register */ #define GIC_SH_WEDGE_OFS 0x0280 -/* Maps Interrupt X to a VPE */ -#define GIC_SH_INTR_MAP_TO_VPE_BASE_OFS 0x2000 -#define GIC_SH_MAP_TO_VPE_REG_OFF(intr, vpe) \ - ((32 * (intr)) + (((vpe) / 32) * 4)) -#define GIC_SH_MAP_TO_VPE_REG_BIT(vpe) (1 << ((vpe) % 32)) - /* Register Map for Local Section */ #define GIC_VPE_CTL_OFS 0x0000 #define GIC_VPE_PEND_OFS 0x0004 -- cgit From 3680746abd87e733ec836025115580562b3dcb47 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:24 -0700 Subject: irqchip: mips-gic: Convert remaining shared reg access to new accessors Convert the remaining accesses to registers in the GIC shared register block to use the new accessor functions provided by asm/mips-gic.h, resulting in code which is often shorter & easier to read. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17034/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index ad8b216b6056..f0a60770d775 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -19,8 +19,6 @@ #define GIC_REG(segment, offset) (segment##_##SECTION_OFS + offset##_##OFS) /* GIC Address Space */ -#define SHARED_SECTION_OFS 0x0000 -#define SHARED_SECTION_SIZE 0x8000 #define VPE_LOCAL_SECTION_OFS 0x8000 #define VPE_LOCAL_SECTION_SIZE 0x4000 #define VPE_OTHER_SECTION_OFS 0xc000 @@ -28,15 +26,6 @@ #define USM_VISIBLE_SECTION_OFS 0x10000 #define USM_VISIBLE_SECTION_SIZE 0x10000 -/* Register Map for Shared Section */ - -#define GIC_SH_CONFIG_OFS 0x0000 - -#define GIC_SH_REVISIONID_OFS 0x0020 - -/* Set/Clear corresponding bit in Edge Detect Register */ -#define GIC_SH_WEDGE_OFS 0x0280 - /* Register Map for Local Section */ #define GIC_VPE_CTL_OFS 0x0000 #define GIC_VPE_PEND_OFS 0x0004 @@ -65,15 +54,6 @@ #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 /* Masks */ -#define GIC_SH_CONFIG_NUMINTRS_SHF 16 -#define GIC_SH_CONFIG_NUMINTRS_MSK (MSK(8) << GIC_SH_CONFIG_NUMINTRS_SHF) - -#define GIC_SH_CONFIG_NUMVPES_SHF 0 -#define GIC_SH_CONFIG_NUMVPES_MSK (MSK(8) << GIC_SH_CONFIG_NUMVPES_SHF) - -#define GIC_SH_WEDGE_SET(intr) ((intr) | (0x1 << 31)) -#define GIC_SH_WEDGE_CLR(intr) ((intr) & ~(0x1 << 31)) - #define GIC_MAP_SHF 0 #define GIC_MAP_MSK (MSK(6) << GIC_MAP_SHF) -- cgit From 9da3c64589e4eae68631b1b5ed31c586be6ad923 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:25 -0700 Subject: irqchip: mips-gic: Convert local int mask access to new accessors Use the new accessor functions provided by asm/mips-gic.h to access masks controlling local interrupts, resulting in code which is often shorter & easier to read. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17035/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 52 ---------------------------------------- 1 file changed, 52 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index f0a60770d775..011698962a8d 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -28,10 +28,6 @@ /* Register Map for Local Section */ #define GIC_VPE_CTL_OFS 0x0000 -#define GIC_VPE_PEND_OFS 0x0004 -#define GIC_VPE_MASK_OFS 0x0008 -#define GIC_VPE_RMASK_OFS 0x000c -#define GIC_VPE_SMASK_OFS 0x0010 #define GIC_VPE_TIMER_MAP_OFS 0x0048 #define GIC_VPE_OTHER_ADDR_OFS 0x0080 #define GIC_VPE_WD_CONFIG0_OFS 0x0090 @@ -69,54 +65,6 @@ #define GIC_VPE_CTL_EIC_MODE_SHF 0 #define GIC_VPE_CTL_EIC_MODE_MSK (MSK(1) << GIC_VPE_CTL_EIC_MODE_SHF) -/* GIC_VPE_PEND Masks */ -#define GIC_VPE_PEND_WD_SHF 0 -#define GIC_VPE_PEND_WD_MSK (MSK(1) << GIC_VPE_PEND_WD_SHF) -#define GIC_VPE_PEND_CMP_SHF 1 -#define GIC_VPE_PEND_CMP_MSK (MSK(1) << GIC_VPE_PEND_CMP_SHF) -#define GIC_VPE_PEND_TIMER_SHF 2 -#define GIC_VPE_PEND_TIMER_MSK (MSK(1) << GIC_VPE_PEND_TIMER_SHF) -#define GIC_VPE_PEND_PERFCOUNT_SHF 3 -#define GIC_VPE_PEND_PERFCOUNT_MSK (MSK(1) << GIC_VPE_PEND_PERFCOUNT_SHF) -#define GIC_VPE_PEND_SWINT0_SHF 4 -#define GIC_VPE_PEND_SWINT0_MSK (MSK(1) << GIC_VPE_PEND_SWINT0_SHF) -#define GIC_VPE_PEND_SWINT1_SHF 5 -#define GIC_VPE_PEND_SWINT1_MSK (MSK(1) << GIC_VPE_PEND_SWINT1_SHF) -#define GIC_VPE_PEND_FDC_SHF 6 -#define GIC_VPE_PEND_FDC_MSK (MSK(1) << GIC_VPE_PEND_FDC_SHF) - -/* GIC_VPE_RMASK Masks */ -#define GIC_VPE_RMASK_WD_SHF 0 -#define GIC_VPE_RMASK_WD_MSK (MSK(1) << GIC_VPE_RMASK_WD_SHF) -#define GIC_VPE_RMASK_CMP_SHF 1 -#define GIC_VPE_RMASK_CMP_MSK (MSK(1) << GIC_VPE_RMASK_CMP_SHF) -#define GIC_VPE_RMASK_TIMER_SHF 2 -#define GIC_VPE_RMASK_TIMER_MSK (MSK(1) << GIC_VPE_RMASK_TIMER_SHF) -#define GIC_VPE_RMASK_PERFCNT_SHF 3 -#define GIC_VPE_RMASK_PERFCNT_MSK (MSK(1) << GIC_VPE_RMASK_PERFCNT_SHF) -#define GIC_VPE_RMASK_SWINT0_SHF 4 -#define GIC_VPE_RMASK_SWINT0_MSK (MSK(1) << GIC_VPE_RMASK_SWINT0_SHF) -#define GIC_VPE_RMASK_SWINT1_SHF 5 -#define GIC_VPE_RMASK_SWINT1_MSK (MSK(1) << GIC_VPE_RMASK_SWINT1_SHF) -#define GIC_VPE_RMASK_FDC_SHF 6 -#define GIC_VPE_RMASK_FDC_MSK (MSK(1) << GIC_VPE_RMASK_FDC_SHF) - -/* GIC_VPE_SMASK Masks */ -#define GIC_VPE_SMASK_WD_SHF 0 -#define GIC_VPE_SMASK_WD_MSK (MSK(1) << GIC_VPE_SMASK_WD_SHF) -#define GIC_VPE_SMASK_CMP_SHF 1 -#define GIC_VPE_SMASK_CMP_MSK (MSK(1) << GIC_VPE_SMASK_CMP_SHF) -#define GIC_VPE_SMASK_TIMER_SHF 2 -#define GIC_VPE_SMASK_TIMER_MSK (MSK(1) << GIC_VPE_SMASK_TIMER_SHF) -#define GIC_VPE_SMASK_PERFCNT_SHF 3 -#define GIC_VPE_SMASK_PERFCNT_MSK (MSK(1) << GIC_VPE_SMASK_PERFCNT_SHF) -#define GIC_VPE_SMASK_SWINT0_SHF 4 -#define GIC_VPE_SMASK_SWINT0_MSK (MSK(1) << GIC_VPE_SMASK_SWINT0_SHF) -#define GIC_VPE_SMASK_SWINT1_SHF 5 -#define GIC_VPE_SMASK_SWINT1_MSK (MSK(1) << GIC_VPE_SMASK_SWINT1_SHF) -#define GIC_VPE_SMASK_FDC_SHF 6 -#define GIC_VPE_SMASK_FDC_MSK (MSK(1) << GIC_VPE_SMASK_FDC_SHF) - /* GIC nomenclature for Core Interrupt Pins. */ #define GIC_CPU_INT0 0 /* Core Interrupt 2 */ #define GIC_CPU_INT1 1 /* . */ -- cgit From 0d0cf58cd6814ed63deb67fc5f4c27ad725075b1 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:26 -0700 Subject: irqchip: mips-gic: Convert remaining local reg access to new accessors Convert the remaining accesses to registers in the GIC VP-local & VP-other register blocks to use the new accessor functions provided by asm/mips-gic.h, resulting in code which is often shorter & easier to read. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17036/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 44 ---------------------------------------- 1 file changed, 44 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 011698962a8d..b7a3ce1da9a7 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -13,58 +13,14 @@ #define GIC_MAX_INTRS 256 -#define MSK(n) ((1 << (n)) - 1) - -/* Accessors */ -#define GIC_REG(segment, offset) (segment##_##SECTION_OFS + offset##_##OFS) - /* GIC Address Space */ -#define VPE_LOCAL_SECTION_OFS 0x8000 -#define VPE_LOCAL_SECTION_SIZE 0x4000 -#define VPE_OTHER_SECTION_OFS 0xc000 -#define VPE_OTHER_SECTION_SIZE 0x4000 #define USM_VISIBLE_SECTION_OFS 0x10000 #define USM_VISIBLE_SECTION_SIZE 0x10000 -/* Register Map for Local Section */ -#define GIC_VPE_CTL_OFS 0x0000 -#define GIC_VPE_TIMER_MAP_OFS 0x0048 -#define GIC_VPE_OTHER_ADDR_OFS 0x0080 -#define GIC_VPE_WD_CONFIG0_OFS 0x0090 -#define GIC_VPE_WD_COUNT0_OFS 0x0094 -#define GIC_VPE_WD_INITIAL0_OFS 0x0098 - -#define GIC_VPE_EIC_SHADOW_SET_BASE_OFS 0x0100 -#define GIC_VPE_EIC_SS(intr) (4 * (intr)) - -#define GIC_VPE_EIC_VEC_BASE_OFS 0x0800 -#define GIC_VPE_EIC_VEC(intr) (4 * (intr)) - -#define GIC_VPE_TENABLE_NMI_OFS 0x1000 -#define GIC_VPE_TENABLE_YQ_OFS 0x1004 -#define GIC_VPE_TENABLE_INT_31_0_OFS 0x1080 -#define GIC_VPE_TENABLE_INT_63_32_OFS 0x1084 - /* User Mode Visible Section Register Map */ #define GIC_UMV_SH_COUNTER_31_00_OFS 0x0000 #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 -/* Masks */ -#define GIC_MAP_SHF 0 -#define GIC_MAP_MSK (MSK(6) << GIC_MAP_SHF) - -/* GIC_VPE_CTL Masks */ -#define GIC_VPE_CTL_FDC_RTBL_SHF 4 -#define GIC_VPE_CTL_FDC_RTBL_MSK (MSK(1) << GIC_VPE_CTL_FDC_RTBL_SHF) -#define GIC_VPE_CTL_SWINT_RTBL_SHF 3 -#define GIC_VPE_CTL_SWINT_RTBL_MSK (MSK(1) << GIC_VPE_CTL_SWINT_RTBL_SHF) -#define GIC_VPE_CTL_PERFCNT_RTBL_SHF 2 -#define GIC_VPE_CTL_PERFCNT_RTBL_MSK (MSK(1) << GIC_VPE_CTL_PERFCNT_RTBL_SHF) -#define GIC_VPE_CTL_TIMER_RTBL_SHF 1 -#define GIC_VPE_CTL_TIMER_RTBL_MSK (MSK(1) << GIC_VPE_CTL_TIMER_RTBL_SHF) -#define GIC_VPE_CTL_EIC_MODE_SHF 0 -#define GIC_VPE_CTL_EIC_MODE_MSK (MSK(1) << GIC_VPE_CTL_EIC_MODE_SHF) - /* GIC nomenclature for Core Interrupt Pins. */ #define GIC_CPU_INT0 0 /* Core Interrupt 2 */ #define GIC_CPU_INT1 1 /* . */ -- cgit From ba9cc4352e9c40ab3b158620a1bd9cd53295ea17 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:27 -0700 Subject: MIPS: GIC: Move GIC_LOCAL_INT_* to asm/mips-gic.h Move the definition of VP-local interrupts provided by the MIPS Global Interrupt Controller to the new asm/mips-gic.h header to be alongside the new accessor functions. Whilst at it, convert to an enum which lends itself more easily to expansion & documentation. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17037/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index b7a3ce1da9a7..9546947d1842 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -38,16 +38,6 @@ /* Mapped interrupt to pin X, then GIC will generate the vector (X+1). */ #define GIC_PIN_TO_VEC_OFFSET 1 -/* Local GIC interrupts. */ -#define GIC_LOCAL_INT_WD 0 /* GIC watchdog */ -#define GIC_LOCAL_INT_COMPARE 1 /* GIC count and compare timer */ -#define GIC_LOCAL_INT_TIMER 2 /* CPU timer interrupt */ -#define GIC_LOCAL_INT_PERFCTR 3 /* CPU performance counter */ -#define GIC_LOCAL_INT_SWINT0 4 /* CPU software interrupt 0 */ -#define GIC_LOCAL_INT_SWINT1 5 /* CPU software interrupt 1 */ -#define GIC_LOCAL_INT_FDC 6 /* CPU fast debug channel */ -#define GIC_NUM_LOCAL_INTRS 7 - /* Convert between local/shared IRQ number and GIC HW IRQ number. */ #define GIC_LOCAL_HWIRQ_BASE 0 #define GIC_LOCAL_TO_HWIRQ(x) (GIC_LOCAL_HWIRQ_BASE + (x)) -- cgit From 3ee50dcbef374651056ea42d5eb543bbacb3ff41 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:28 -0700 Subject: irqchip: mips-gic: Remove GIC_CPU_INT* macros The GIC_CPU_INT* macros are never used. Remove the dead code. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17038/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 9546947d1842..e93aaf529baa 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -21,14 +21,6 @@ #define GIC_UMV_SH_COUNTER_31_00_OFS 0x0000 #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 -/* GIC nomenclature for Core Interrupt Pins. */ -#define GIC_CPU_INT0 0 /* Core Interrupt 2 */ -#define GIC_CPU_INT1 1 /* . */ -#define GIC_CPU_INT2 2 /* . */ -#define GIC_CPU_INT3 3 /* . */ -#define GIC_CPU_INT4 4 /* . */ -#define GIC_CPU_INT5 5 /* Core Interrupt 7 */ - /* Add 2 to convert GIC CPU pin to core interrupt */ #define GIC_CPU_PIN_OFFSET 2 -- cgit From b11d4c1f5a3ac68fa163b0daeca1f98ba328c9de Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:29 -0700 Subject: irqchip: mips-gic: Move various definitions to the driver Move the definitions of macros used to convert between hardware IRQ numbers & shared or local interrupt numbers into the irqchip driver, which is all that should ever need to care about them. Remove GIC_CPU_TO_VEC_OFFSET() in the process since it's never used. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17039/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index e93aaf529baa..da02a146b292 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -11,8 +11,6 @@ #include #include -#define GIC_MAX_INTRS 256 - /* GIC Address Space */ #define USM_VISIBLE_SECTION_OFS 0x10000 #define USM_VISIBLE_SECTION_SIZE 0x10000 @@ -21,23 +19,6 @@ #define GIC_UMV_SH_COUNTER_31_00_OFS 0x0000 #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 -/* Add 2 to convert GIC CPU pin to core interrupt */ -#define GIC_CPU_PIN_OFFSET 2 - -/* Add 2 to convert non-EIC hardware interrupt to EIC vector number. */ -#define GIC_CPU_TO_VEC_OFFSET 2 - -/* Mapped interrupt to pin X, then GIC will generate the vector (X+1). */ -#define GIC_PIN_TO_VEC_OFFSET 1 - -/* Convert between local/shared IRQ number and GIC HW IRQ number. */ -#define GIC_LOCAL_HWIRQ_BASE 0 -#define GIC_LOCAL_TO_HWIRQ(x) (GIC_LOCAL_HWIRQ_BASE + (x)) -#define GIC_HWIRQ_TO_LOCAL(x) ((x) - GIC_LOCAL_HWIRQ_BASE) -#define GIC_SHARED_HWIRQ_BASE GIC_NUM_LOCAL_INTRS -#define GIC_SHARED_TO_HWIRQ(x) (GIC_SHARED_HWIRQ_BASE + (x)) -#define GIC_HWIRQ_TO_SHARED(x) ((x) - GIC_SHARED_HWIRQ_BASE) - #ifdef CONFIG_MIPS_GIC extern unsigned int gic_present; -- cgit From 84103814a2cfd3561ff00bd7317c22f40f9e0dad Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:31 -0700 Subject: irqchip: mips-gic: Remove gic_get_usm_range() The MIPS VDSO code is no longer reliant upon the irqchip driver to provide the address of the GIC's user-visible section via gic_get_usm_range(). Remove the now-dead code. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17041/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index da02a146b292..843e1bb49767 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -11,10 +11,6 @@ #include #include -/* GIC Address Space */ -#define USM_VISIBLE_SECTION_OFS 0x10000 -#define USM_VISIBLE_SECTION_SIZE 0x10000 - /* User Mode Visible Section Register Map */ #define GIC_UMV_SH_COUNTER_31_00_OFS 0x0000 #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 @@ -29,18 +25,11 @@ extern void gic_init(unsigned long gic_base_addr, extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); -extern int gic_get_usm_range(struct resource *gic_usm_res); #else /* CONFIG_MIPS_GIC */ #define gic_present 0 -static inline int gic_get_usm_range(struct resource *gic_usm_res) -{ - /* Shouldn't be called. */ - return -1; -} - #endif /* CONFIG_MIPS_GIC */ #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit From 85eec73ce4c4322b3b442b9582f6656abb5d125f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:33 -0700 Subject: irqchip: mips-gic: Remove gic_init() All in-tree platforms now probe the GIC driver using device tree, and as such nothing calls gic_init() any longer. Remove the dead code. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17043/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 843e1bb49767..63c5f87034c9 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -19,9 +19,6 @@ extern unsigned int gic_present; -extern void gic_init(unsigned long gic_base_addr, - unsigned long gic_addrspace_size, unsigned int cpu_vec, - unsigned int irqbase); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); -- cgit From 56d7b61dc6d4e13cef786c8e4a2bfe43e4db932d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:35 -0700 Subject: irqchip: mips-gic: Remove gic_present Nothing uses the global gic_present variable anymore; mips_gic_present() should be used instead. Remove the dead code. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17045/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 63c5f87034c9..113010c8dd8b 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -17,16 +17,10 @@ #ifdef CONFIG_MIPS_GIC -extern unsigned int gic_present; - extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); -#else /* CONFIG_MIPS_GIC */ - -#define gic_present 0 - #endif /* CONFIG_MIPS_GIC */ #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit From dd0163508c07a67b28befe5af23d7ab9941ae8ca Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:36 -0700 Subject: irqchip: mips-gic: Move gic_get_c0_*_int() to asm/mips-gic.h The linux/irqchip/mips-gic.h header is now almost empty. Move the declarations of gic_get_c0_compare_int(), gic_get_c0_perfcount_int() & gic_get_c0_fdc_int() to asm/mips-gic.h in order to close in on being able to delete the former header. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17046/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 113010c8dd8b..277d5be03a57 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -15,12 +15,4 @@ #define GIC_UMV_SH_COUNTER_31_00_OFS 0x0000 #define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 -#ifdef CONFIG_MIPS_GIC - -extern int gic_get_c0_compare_int(void); -extern int gic_get_c0_perfcount_int(void); -extern int gic_get_c0_fdc_int(void); - -#endif /* CONFIG_MIPS_GIC */ - #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit From 16ae123e89d603a69d980bd76c4bb686f219ba0e Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:37 -0700 Subject: MIPS: VDSO: Avoid use of linux/irqchip/mips-gic.h Our VDSO code makes use of macros from linux/irqchip/mips-gic.h to provide offsets to register values, but these are trivial offsets to the two 32 bit halves of a 64 bit value. Replace use of the macros with zero (ie. omit adding an offset) and the size of the low 32 bit of the value. This removes our need for linux/irqchip/mips-gic.h & prepares us for it to be removed. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17047/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 277d5be03a57..6e6c9adea049 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -11,8 +11,4 @@ #include #include -/* User Mode Visible Section Register Map */ -#define GIC_UMV_SH_COUNTER_31_00_OFS 0x0000 -#define GIC_UMV_SH_COUNTER_63_32_OFS 0x0004 - #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit From 1fad12cd5ef43755d028f760fa089c87f5533451 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 12 Aug 2017 21:36:39 -0700 Subject: irqchip: mips-gic: Remove linux/irqchip/mips-gic.h The linux/irqchip/mips-gic.h header is no longer used. Remove it. Signed-off-by: Paul Burton Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17049/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/irqchip/mips-gic.h (limited to 'include') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h deleted file mode 100644 index 6e6c9adea049..000000000000 --- a/include/linux/irqchip/mips-gic.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000, 07 MIPS Technologies, Inc. - */ -#ifndef __LINUX_IRQCHIP_MIPS_GIC_H -#define __LINUX_IRQCHIP_MIPS_GIC_H - -#include -#include - -#endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit From a205425658dead19bb1b8ac00584aed98e60dde2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Aug 2017 18:34:24 +0200 Subject: mfd: twl: Move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Greg Kroah-Hartman Acked-by: Alexandre Belloni Acked-by: Mark Brown Acked-by: Sebastian Reichel Acked-by: Jonathan Cameron Acked-by: Dmitry Torokhov Acked-by: Kishon Vijay Abraham I Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Thierry Reding Acked-by: Tony Lindgren Acked-by: Daniel Thompson Acked-by: Linus Walleij Acked-by: Guenter Roeck Signed-off-by: Lee Jones --- include/linux/i2c/twl.h | 876 ------------------------------------------------ include/linux/mfd/twl.h | 876 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 876 insertions(+), 876 deletions(-) delete mode 100644 include/linux/i2c/twl.h create mode 100644 include/linux/mfd/twl.h (limited to 'include') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h deleted file mode 100644 index 9ad7828d9d34..000000000000 --- a/include/linux/i2c/twl.h +++ /dev/null @@ -1,876 +0,0 @@ -/* - * twl4030.h - header for TWL4030 PM and audio CODEC device - * - * Copyright (C) 2005-2006 Texas Instruments, Inc. - * - * Based on tlv320aic23.c: - * Copyright (c) by Kai Svahn - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef __TWL_H_ -#define __TWL_H_ - -#include -#include - -/* - * Using the twl4030 core we address registers using a pair - * { module id, relative register offset } - * which that core then maps to the relevant - * { i2c slave, absolute register address } - * - * The module IDs are meaningful only to the twl4030 core code, - * which uses them as array indices to look up the first register - * address each module uses within a given i2c slave. - */ - -/* Module IDs for similar functionalities found in twl4030/twl6030 */ -enum twl_module_ids { - TWL_MODULE_USB, - TWL_MODULE_PIH, - TWL_MODULE_MAIN_CHARGE, - TWL_MODULE_PM_MASTER, - TWL_MODULE_PM_RECEIVER, - - TWL_MODULE_RTC, - TWL_MODULE_PWM, - TWL_MODULE_LED, - TWL_MODULE_SECURED_REG, - - TWL_MODULE_LAST, -}; - -/* Modules only available in twl4030 series */ -enum twl4030_module_ids { - TWL4030_MODULE_AUDIO_VOICE = TWL_MODULE_LAST, - TWL4030_MODULE_GPIO, - TWL4030_MODULE_INTBR, - TWL4030_MODULE_TEST, - TWL4030_MODULE_KEYPAD, - - TWL4030_MODULE_MADC, - TWL4030_MODULE_INTERRUPTS, - TWL4030_MODULE_PRECHARGE, - TWL4030_MODULE_BACKUP, - TWL4030_MODULE_INT, - - TWL5031_MODULE_ACCESSORY, - TWL5031_MODULE_INTERRUPTS, - - TWL4030_MODULE_LAST, -}; - -/* Modules only available in twl6030 series */ -enum twl6030_module_ids { - TWL6030_MODULE_ID0 = TWL_MODULE_LAST, - TWL6030_MODULE_ID1, - TWL6030_MODULE_ID2, - TWL6030_MODULE_GPADC, - TWL6030_MODULE_GASGAUGE, - - TWL6030_MODULE_LAST, -}; - -/* Until the clients has been converted to use TWL_MODULE_LED */ -#define TWL4030_MODULE_LED TWL_MODULE_LED - -#define GPIO_INTR_OFFSET 0 -#define KEYPAD_INTR_OFFSET 1 -#define BCI_INTR_OFFSET 2 -#define MADC_INTR_OFFSET 3 -#define USB_INTR_OFFSET 4 -#define CHARGERFAULT_INTR_OFFSET 5 -#define BCI_PRES_INTR_OFFSET 9 -#define USB_PRES_INTR_OFFSET 10 -#define RTC_INTR_OFFSET 11 - -/* - * Offset from TWL6030_IRQ_BASE / pdata->irq_base - */ -#define PWR_INTR_OFFSET 0 -#define HOTDIE_INTR_OFFSET 12 -#define SMPSLDO_INTR_OFFSET 13 -#define BATDETECT_INTR_OFFSET 14 -#define SIMDETECT_INTR_OFFSET 15 -#define MMCDETECT_INTR_OFFSET 16 -#define GASGAUGE_INTR_OFFSET 17 -#define USBOTG_INTR_OFFSET 4 -#define CHARGER_INTR_OFFSET 2 -#define RSV_INTR_OFFSET 0 - -/* INT register offsets */ -#define REG_INT_STS_A 0x00 -#define REG_INT_STS_B 0x01 -#define REG_INT_STS_C 0x02 - -#define REG_INT_MSK_LINE_A 0x03 -#define REG_INT_MSK_LINE_B 0x04 -#define REG_INT_MSK_LINE_C 0x05 - -#define REG_INT_MSK_STS_A 0x06 -#define REG_INT_MSK_STS_B 0x07 -#define REG_INT_MSK_STS_C 0x08 - -/* MASK INT REG GROUP A */ -#define TWL6030_PWR_INT_MASK 0x07 -#define TWL6030_RTC_INT_MASK 0x18 -#define TWL6030_HOTDIE_INT_MASK 0x20 -#define TWL6030_SMPSLDOA_INT_MASK 0xC0 - -/* MASK INT REG GROUP B */ -#define TWL6030_SMPSLDOB_INT_MASK 0x01 -#define TWL6030_BATDETECT_INT_MASK 0x02 -#define TWL6030_SIMDETECT_INT_MASK 0x04 -#define TWL6030_MMCDETECT_INT_MASK 0x08 -#define TWL6030_GPADC_INT_MASK 0x60 -#define TWL6030_GASGAUGE_INT_MASK 0x80 - -/* MASK INT REG GROUP C */ -#define TWL6030_USBOTG_INT_MASK 0x0F -#define TWL6030_CHARGER_CTRL_INT_MASK 0x10 -#define TWL6030_CHARGER_FAULT_INT_MASK 0x60 - -#define TWL6030_MMCCTRL 0xEE -#define VMMC_AUTO_OFF (0x1 << 3) -#define SW_FC (0x1 << 2) -#define STS_MMC 0x1 - -#define TWL6030_CFG_INPUT_PUPD3 0xF2 -#define MMC_PU (0x1 << 3) -#define MMC_PD (0x1 << 2) - -#define TWL_SIL_TYPE(rev) ((rev) & 0x00FFFFFF) -#define TWL_SIL_REV(rev) ((rev) >> 24) -#define TWL_SIL_5030 0x09002F -#define TWL5030_REV_1_0 0x00 -#define TWL5030_REV_1_1 0x10 -#define TWL5030_REV_1_2 0x30 - -#define TWL4030_CLASS_ID 0x4030 -#define TWL6030_CLASS_ID 0x6030 -unsigned int twl_rev(void); -#define GET_TWL_REV (twl_rev()) -#define TWL_CLASS_IS(class, id) \ -static inline int twl_class_is_ ##class(void) \ -{ \ - return ((id) == (GET_TWL_REV)) ? 1 : 0; \ -} - -TWL_CLASS_IS(4030, TWL4030_CLASS_ID) -TWL_CLASS_IS(6030, TWL6030_CLASS_ID) - -/* Set the regcache bypass for the regmap associated with the nodule */ -int twl_set_regcache_bypass(u8 mod_no, bool enable); - -/* - * Read and write several 8-bit registers at once. - */ -int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); -int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); - -/* - * Read and write single 8-bit registers - */ -static inline int twl_i2c_write_u8(u8 mod_no, u8 val, u8 reg) { - return twl_i2c_write(mod_no, &val, reg, 1); -} - -static inline int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg) { - return twl_i2c_read(mod_no, val, reg, 1); -} - -static inline int twl_i2c_write_u16(u8 mod_no, u16 val, u8 reg) { - val = cpu_to_le16(val); - return twl_i2c_write(mod_no, (u8*) &val, reg, 2); -} - -static inline int twl_i2c_read_u16(u8 mod_no, u16 *val, u8 reg) { - int ret; - ret = twl_i2c_read(mod_no, (u8*) val, reg, 2); - *val = le16_to_cpu(*val); - return ret; -} - -int twl_get_type(void); -int twl_get_version(void); -int twl_get_hfclk_rate(void); - -int twl6030_interrupt_unmask(u8 bit_mask, u8 offset); -int twl6030_interrupt_mask(u8 bit_mask, u8 offset); - -/* Card detect Configuration for MMC1 Controller on OMAP4 */ -#ifdef CONFIG_TWL4030_CORE -int twl6030_mmc_card_detect_config(void); -#else -static inline int twl6030_mmc_card_detect_config(void) -{ - pr_debug("twl6030_mmc_card_detect_config not supported\n"); - return 0; -} -#endif - -/* MMC1 Controller on OMAP4 uses Phoenix irq for Card detect */ -#ifdef CONFIG_TWL4030_CORE -int twl6030_mmc_card_detect(struct device *dev, int slot); -#else -static inline int twl6030_mmc_card_detect(struct device *dev, int slot) -{ - pr_debug("Call back twl6030_mmc_card_detect not supported\n"); - return -EIO; -} -#endif -/*----------------------------------------------------------------------*/ - -/* - * NOTE: at up to 1024 registers, this is a big chip. - * - * Avoid putting register declarations in this file, instead of into - * a driver-private file, unless some of the registers in a block - * need to be shared with other drivers. One example is blocks that - * have Secondary IRQ Handler (SIH) registers. - */ - -#define TWL4030_SIH_CTRL_EXCLEN_MASK BIT(0) -#define TWL4030_SIH_CTRL_PENDDIS_MASK BIT(1) -#define TWL4030_SIH_CTRL_COR_MASK BIT(2) - -/*----------------------------------------------------------------------*/ - -/* - * GPIO Block Register offsets (use TWL4030_MODULE_GPIO) - */ - -#define REG_GPIODATAIN1 0x0 -#define REG_GPIODATAIN2 0x1 -#define REG_GPIODATAIN3 0x2 -#define REG_GPIODATADIR1 0x3 -#define REG_GPIODATADIR2 0x4 -#define REG_GPIODATADIR3 0x5 -#define REG_GPIODATAOUT1 0x6 -#define REG_GPIODATAOUT2 0x7 -#define REG_GPIODATAOUT3 0x8 -#define REG_CLEARGPIODATAOUT1 0x9 -#define REG_CLEARGPIODATAOUT2 0xA -#define REG_CLEARGPIODATAOUT3 0xB -#define REG_SETGPIODATAOUT1 0xC -#define REG_SETGPIODATAOUT2 0xD -#define REG_SETGPIODATAOUT3 0xE -#define REG_GPIO_DEBEN1 0xF -#define REG_GPIO_DEBEN2 0x10 -#define REG_GPIO_DEBEN3 0x11 -#define REG_GPIO_CTRL 0x12 -#define REG_GPIOPUPDCTR1 0x13 -#define REG_GPIOPUPDCTR2 0x14 -#define REG_GPIOPUPDCTR3 0x15 -#define REG_GPIOPUPDCTR4 0x16 -#define REG_GPIOPUPDCTR5 0x17 -#define REG_GPIO_ISR1A 0x19 -#define REG_GPIO_ISR2A 0x1A -#define REG_GPIO_ISR3A 0x1B -#define REG_GPIO_IMR1A 0x1C -#define REG_GPIO_IMR2A 0x1D -#define REG_GPIO_IMR3A 0x1E -#define REG_GPIO_ISR1B 0x1F -#define REG_GPIO_ISR2B 0x20 -#define REG_GPIO_ISR3B 0x21 -#define REG_GPIO_IMR1B 0x22 -#define REG_GPIO_IMR2B 0x23 -#define REG_GPIO_IMR3B 0x24 -#define REG_GPIO_EDR1 0x28 -#define REG_GPIO_EDR2 0x29 -#define REG_GPIO_EDR3 0x2A -#define REG_GPIO_EDR4 0x2B -#define REG_GPIO_EDR5 0x2C -#define REG_GPIO_SIH_CTRL 0x2D - -/* Up to 18 signals are available as GPIOs, when their - * pins are not assigned to another use (such as ULPI/USB). - */ -#define TWL4030_GPIO_MAX 18 - -/*----------------------------------------------------------------------*/ - -/*Interface Bit Register (INTBR) offsets - *(Use TWL_4030_MODULE_INTBR) - */ - -#define REG_IDCODE_7_0 0x00 -#define REG_IDCODE_15_8 0x01 -#define REG_IDCODE_16_23 0x02 -#define REG_IDCODE_31_24 0x03 -#define REG_GPPUPDCTR1 0x0F -#define REG_UNLOCK_TEST_REG 0x12 - -/*I2C1 and I2C4(SR) SDA/SCL pull-up control bits */ - -#define I2C_SCL_CTRL_PU BIT(0) -#define I2C_SDA_CTRL_PU BIT(2) -#define SR_I2C_SCL_CTRL_PU BIT(4) -#define SR_I2C_SDA_CTRL_PU BIT(6) - -#define TWL_EEPROM_R_UNLOCK 0x49 - -/*----------------------------------------------------------------------*/ - -/* - * Keypad register offsets (use TWL4030_MODULE_KEYPAD) - * ... SIH/interrupt only - */ - -#define TWL4030_KEYPAD_KEYP_ISR1 0x11 -#define TWL4030_KEYPAD_KEYP_IMR1 0x12 -#define TWL4030_KEYPAD_KEYP_ISR2 0x13 -#define TWL4030_KEYPAD_KEYP_IMR2 0x14 -#define TWL4030_KEYPAD_KEYP_SIR 0x15 /* test register */ -#define TWL4030_KEYPAD_KEYP_EDR 0x16 -#define TWL4030_KEYPAD_KEYP_SIH_CTRL 0x17 - -/*----------------------------------------------------------------------*/ - -/* - * Multichannel ADC register offsets (use TWL4030_MODULE_MADC) - * ... SIH/interrupt only - */ - -#define TWL4030_MADC_ISR1 0x61 -#define TWL4030_MADC_IMR1 0x62 -#define TWL4030_MADC_ISR2 0x63 -#define TWL4030_MADC_IMR2 0x64 -#define TWL4030_MADC_SIR 0x65 /* test register */ -#define TWL4030_MADC_EDR 0x66 -#define TWL4030_MADC_SIH_CTRL 0x67 - -/*----------------------------------------------------------------------*/ - -/* - * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS) - */ - -#define TWL4030_INTERRUPTS_BCIISR1A 0x0 -#define TWL4030_INTERRUPTS_BCIISR2A 0x1 -#define TWL4030_INTERRUPTS_BCIIMR1A 0x2 -#define TWL4030_INTERRUPTS_BCIIMR2A 0x3 -#define TWL4030_INTERRUPTS_BCIISR1B 0x4 -#define TWL4030_INTERRUPTS_BCIISR2B 0x5 -#define TWL4030_INTERRUPTS_BCIIMR1B 0x6 -#define TWL4030_INTERRUPTS_BCIIMR2B 0x7 -#define TWL4030_INTERRUPTS_BCISIR1 0x8 /* test register */ -#define TWL4030_INTERRUPTS_BCISIR2 0x9 /* test register */ -#define TWL4030_INTERRUPTS_BCIEDR1 0xa -#define TWL4030_INTERRUPTS_BCIEDR2 0xb -#define TWL4030_INTERRUPTS_BCIEDR3 0xc -#define TWL4030_INTERRUPTS_BCISIHCTRL 0xd - -/*----------------------------------------------------------------------*/ - -/* - * Power Interrupt block register offsets (use TWL4030_MODULE_INT) - */ - -#define TWL4030_INT_PWR_ISR1 0x0 -#define TWL4030_INT_PWR_IMR1 0x1 -#define TWL4030_INT_PWR_ISR2 0x2 -#define TWL4030_INT_PWR_IMR2 0x3 -#define TWL4030_INT_PWR_SIR 0x4 /* test register */ -#define TWL4030_INT_PWR_EDR1 0x5 -#define TWL4030_INT_PWR_EDR2 0x6 -#define TWL4030_INT_PWR_SIH_CTRL 0x7 - -/*----------------------------------------------------------------------*/ - -/* - * Accessory Interrupts - */ -#define TWL5031_ACIIMR_LSB 0x05 -#define TWL5031_ACIIMR_MSB 0x06 -#define TWL5031_ACIIDR_LSB 0x07 -#define TWL5031_ACIIDR_MSB 0x08 -#define TWL5031_ACCISR1 0x0F -#define TWL5031_ACCIMR1 0x10 -#define TWL5031_ACCISR2 0x11 -#define TWL5031_ACCIMR2 0x12 -#define TWL5031_ACCSIR 0x13 -#define TWL5031_ACCEDR1 0x14 -#define TWL5031_ACCSIHCTRL 0x15 - -/*----------------------------------------------------------------------*/ - -/* - * Battery Charger Controller - */ - -#define TWL5031_INTERRUPTS_BCIISR1 0x0 -#define TWL5031_INTERRUPTS_BCIIMR1 0x1 -#define TWL5031_INTERRUPTS_BCIISR2 0x2 -#define TWL5031_INTERRUPTS_BCIIMR2 0x3 -#define TWL5031_INTERRUPTS_BCISIR 0x4 -#define TWL5031_INTERRUPTS_BCIEDR1 0x5 -#define TWL5031_INTERRUPTS_BCIEDR2 0x6 -#define TWL5031_INTERRUPTS_BCISIHCTRL 0x7 - -/*----------------------------------------------------------------------*/ - -/* - * PM Master module register offsets (use TWL4030_MODULE_PM_MASTER) - */ - -#define TWL4030_PM_MASTER_CFG_P1_TRANSITION 0x00 -#define TWL4030_PM_MASTER_CFG_P2_TRANSITION 0x01 -#define TWL4030_PM_MASTER_CFG_P3_TRANSITION 0x02 -#define TWL4030_PM_MASTER_CFG_P123_TRANSITION 0x03 -#define TWL4030_PM_MASTER_STS_BOOT 0x04 -#define TWL4030_PM_MASTER_CFG_BOOT 0x05 -#define TWL4030_PM_MASTER_SHUNDAN 0x06 -#define TWL4030_PM_MASTER_BOOT_BCI 0x07 -#define TWL4030_PM_MASTER_CFG_PWRANA1 0x08 -#define TWL4030_PM_MASTER_CFG_PWRANA2 0x09 -#define TWL4030_PM_MASTER_BACKUP_MISC_STS 0x0b -#define TWL4030_PM_MASTER_BACKUP_MISC_CFG 0x0c -#define TWL4030_PM_MASTER_BACKUP_MISC_TST 0x0d -#define TWL4030_PM_MASTER_PROTECT_KEY 0x0e -#define TWL4030_PM_MASTER_STS_HW_CONDITIONS 0x0f -#define TWL4030_PM_MASTER_P1_SW_EVENTS 0x10 -#define TWL4030_PM_MASTER_P2_SW_EVENTS 0x11 -#define TWL4030_PM_MASTER_P3_SW_EVENTS 0x12 -#define TWL4030_PM_MASTER_STS_P123_STATE 0x13 -#define TWL4030_PM_MASTER_PB_CFG 0x14 -#define TWL4030_PM_MASTER_PB_WORD_MSB 0x15 -#define TWL4030_PM_MASTER_PB_WORD_LSB 0x16 -#define TWL4030_PM_MASTER_SEQ_ADD_W2P 0x1c -#define TWL4030_PM_MASTER_SEQ_ADD_P2A 0x1d -#define TWL4030_PM_MASTER_SEQ_ADD_A2W 0x1e -#define TWL4030_PM_MASTER_SEQ_ADD_A2S 0x1f -#define TWL4030_PM_MASTER_SEQ_ADD_S2A12 0x20 -#define TWL4030_PM_MASTER_SEQ_ADD_S2A3 0x21 -#define TWL4030_PM_MASTER_SEQ_ADD_WARM 0x22 -#define TWL4030_PM_MASTER_MEMORY_ADDRESS 0x23 -#define TWL4030_PM_MASTER_MEMORY_DATA 0x24 - -#define TWL4030_PM_MASTER_KEY_CFG1 0xc0 -#define TWL4030_PM_MASTER_KEY_CFG2 0x0c - -#define TWL4030_PM_MASTER_KEY_TST1 0xe0 -#define TWL4030_PM_MASTER_KEY_TST2 0x0e - -#define TWL4030_PM_MASTER_GLOBAL_TST 0xb6 - -/*----------------------------------------------------------------------*/ - -/* Power bus message definitions */ - -/* The TWL4030/5030 splits its power-management resources (the various - * regulators, clock and reset lines) into 3 processor groups - P1, P2 and - * P3. These groups can then be configured to transition between sleep, wait-on - * and active states by sending messages to the power bus. See Section 5.4.2 - * Power Resources of TWL4030 TRM - */ - -/* Processor groups */ -#define DEV_GRP_NULL 0x0 -#define DEV_GRP_P1 0x1 /* P1: all OMAP devices */ -#define DEV_GRP_P2 0x2 /* P2: all Modem devices */ -#define DEV_GRP_P3 0x4 /* P3: all peripheral devices */ - -/* Resource groups */ -#define RES_GRP_RES 0x0 /* Reserved */ -#define RES_GRP_PP 0x1 /* Power providers */ -#define RES_GRP_RC 0x2 /* Reset and control */ -#define RES_GRP_PP_RC 0x3 -#define RES_GRP_PR 0x4 /* Power references */ -#define RES_GRP_PP_PR 0x5 -#define RES_GRP_RC_PR 0x6 -#define RES_GRP_ALL 0x7 /* All resource groups */ - -#define RES_TYPE2_R0 0x0 -#define RES_TYPE2_R1 0x1 -#define RES_TYPE2_R2 0x2 - -#define RES_TYPE_R0 0x0 -#define RES_TYPE_ALL 0x7 - -/* Resource states */ -#define RES_STATE_WRST 0xF -#define RES_STATE_ACTIVE 0xE -#define RES_STATE_SLEEP 0x8 -#define RES_STATE_OFF 0x0 - -/* Power resources */ - -/* Power providers */ -#define RES_VAUX1 1 -#define RES_VAUX2 2 -#define RES_VAUX3 3 -#define RES_VAUX4 4 -#define RES_VMMC1 5 -#define RES_VMMC2 6 -#define RES_VPLL1 7 -#define RES_VPLL2 8 -#define RES_VSIM 9 -#define RES_VDAC 10 -#define RES_VINTANA1 11 -#define RES_VINTANA2 12 -#define RES_VINTDIG 13 -#define RES_VIO 14 -#define RES_VDD1 15 -#define RES_VDD2 16 -#define RES_VUSB_1V5 17 -#define RES_VUSB_1V8 18 -#define RES_VUSB_3V1 19 -#define RES_VUSBCP 20 -#define RES_REGEN 21 -/* Reset and control */ -#define RES_NRES_PWRON 22 -#define RES_CLKEN 23 -#define RES_SYSEN 24 -#define RES_HFCLKOUT 25 -#define RES_32KCLKOUT 26 -#define RES_RESET 27 -/* Power Reference */ -#define RES_MAIN_REF 28 - -#define TOTAL_RESOURCES 28 -/* - * Power Bus Message Format ... these can be sent individually by Linux, - * but are usually part of downloaded scripts that are run when various - * power events are triggered. - * - * Broadcast Message (16 Bits): - * DEV_GRP[15:13] MT[12] RES_GRP[11:9] RES_TYPE2[8:7] RES_TYPE[6:4] - * RES_STATE[3:0] - * - * Singular Message (16 Bits): - * DEV_GRP[15:13] MT[12] RES_ID[11:4] RES_STATE[3:0] - */ - -#define MSG_BROADCAST(devgrp, grp, type, type2, state) \ - ( (devgrp) << 13 | 1 << 12 | (grp) << 9 | (type2) << 7 \ - | (type) << 4 | (state)) - -#define MSG_SINGULAR(devgrp, id, state) \ - ((devgrp) << 13 | 0 << 12 | (id) << 4 | (state)) - -#define MSG_BROADCAST_ALL(devgrp, state) \ - ((devgrp) << 5 | (state)) - -#define MSG_BROADCAST_REF MSG_BROADCAST_ALL -#define MSG_BROADCAST_PROV MSG_BROADCAST_ALL -#define MSG_BROADCAST__CLK_RST MSG_BROADCAST_ALL -/*----------------------------------------------------------------------*/ - -struct twl4030_clock_init_data { - bool ck32k_lowpwr_enable; -}; - -struct twl4030_bci_platform_data { - int *battery_tmp_tbl; - unsigned int tblsize; - int bb_uvolt; /* voltage to charge backup battery */ - int bb_uamp; /* current for backup battery charging */ -}; - -/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */ -struct twl4030_gpio_platform_data { - /* package the two LED signals as output-only GPIOs? */ - bool use_leds; - - /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ - u8 mmc_cd; - - /* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */ - u32 debounce; - - /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup - * should be enabled. Else, if that bit is set in "pulldowns", - * that pulldown is enabled. Don't waste power by letting any - * digital inputs float... - */ - u32 pullups; - u32 pulldowns; - - int (*setup)(struct device *dev, - unsigned gpio, unsigned ngpio); - int (*teardown)(struct device *dev, - unsigned gpio, unsigned ngpio); -}; - -struct twl4030_madc_platform_data { - int irq_line; -}; - -/* Boards have unique mappings of {row, col} --> keycode. - * Column and row are 8 bits each, but range only from 0..7. - * a PERSISTENT_KEY is "always on" and never reported. - */ -#define PERSISTENT_KEY(r, c) KEY((r), (c), KEY_RESERVED) - -struct twl4030_keypad_data { - const struct matrix_keymap_data *keymap_data; - unsigned rows; - unsigned cols; - bool rep; -}; - -enum twl4030_usb_mode { - T2_USB_MODE_ULPI = 1, - T2_USB_MODE_CEA2011_3PIN = 2, -}; - -struct twl4030_usb_data { - enum twl4030_usb_mode usb_mode; - unsigned long features; - - int (*phy_init)(struct device *dev); - int (*phy_exit)(struct device *dev); - /* Power on/off the PHY */ - int (*phy_power)(struct device *dev, int iD, int on); - /* enable/disable phy clocks */ - int (*phy_set_clock)(struct device *dev, int on); - /* suspend/resume of phy */ - int (*phy_suspend)(struct device *dev, int suspend); -}; - -struct twl4030_ins { - u16 pmb_message; - u8 delay; -}; - -struct twl4030_script { - struct twl4030_ins *script; - unsigned size; - u8 flags; -#define TWL4030_WRST_SCRIPT (1<<0) -#define TWL4030_WAKEUP12_SCRIPT (1<<1) -#define TWL4030_WAKEUP3_SCRIPT (1<<2) -#define TWL4030_SLEEP_SCRIPT (1<<3) -}; - -struct twl4030_resconfig { - u8 resource; - u8 devgroup; /* Processor group that Power resource belongs to */ - u8 type; /* Power resource addressed, 6 / broadcast message */ - u8 type2; /* Power resource addressed, 3 / broadcast message */ - u8 remap_off; /* off state remapping */ - u8 remap_sleep; /* sleep state remapping */ -}; - -struct twl4030_power_data { - struct twl4030_script **scripts; - unsigned num; - struct twl4030_resconfig *resource_config; - struct twl4030_resconfig *board_config; -#define TWL4030_RESCONFIG_UNDEF ((u8)-1) - bool use_poweroff; /* Board is wired for TWL poweroff */ - bool ac_charger_quirk; /* Disable AC charger on board */ -}; - -extern int twl4030_remove_script(u8 flags); -extern void twl4030_power_off(void); - -struct twl4030_codec_data { - unsigned int digimic_delay; /* in ms */ - unsigned int ramp_delay_value; - unsigned int offset_cncl_path; - unsigned int hs_extmute:1; - int hs_extmute_gpio; -}; - -struct twl4030_vibra_data { - unsigned int coexist; -}; - -struct twl4030_audio_data { - unsigned int audio_mclk; - struct twl4030_codec_data *codec; - struct twl4030_vibra_data *vibra; - - /* twl6040 */ - int audpwron_gpio; /* audio power-on gpio */ - int naudint_irq; /* audio interrupt */ - unsigned int irq_base; -}; - -struct twl4030_platform_data { - struct twl4030_clock_init_data *clock; - struct twl4030_bci_platform_data *bci; - struct twl4030_gpio_platform_data *gpio; - struct twl4030_madc_platform_data *madc; - struct twl4030_keypad_data *keypad; - struct twl4030_usb_data *usb; - struct twl4030_power_data *power; - struct twl4030_audio_data *audio; - - /* Common LDO regulators for TWL4030/TWL6030 */ - struct regulator_init_data *vdac; - struct regulator_init_data *vaux1; - struct regulator_init_data *vaux2; - struct regulator_init_data *vaux3; - struct regulator_init_data *vdd1; - struct regulator_init_data *vdd2; - struct regulator_init_data *vdd3; - /* TWL4030 LDO regulators */ - struct regulator_init_data *vpll1; - struct regulator_init_data *vpll2; - struct regulator_init_data *vmmc1; - struct regulator_init_data *vmmc2; - struct regulator_init_data *vsim; - struct regulator_init_data *vaux4; - struct regulator_init_data *vio; - struct regulator_init_data *vintana1; - struct regulator_init_data *vintana2; - struct regulator_init_data *vintdig; - /* TWL6030 LDO regulators */ - struct regulator_init_data *vmmc; - struct regulator_init_data *vpp; - struct regulator_init_data *vusim; - struct regulator_init_data *vana; - struct regulator_init_data *vcxio; - struct regulator_init_data *vusb; - struct regulator_init_data *clk32kg; - struct regulator_init_data *v1v8; - struct regulator_init_data *v2v1; - /* TWL6032 LDO regulators */ - struct regulator_init_data *ldo1; - struct regulator_init_data *ldo2; - struct regulator_init_data *ldo3; - struct regulator_init_data *ldo4; - struct regulator_init_data *ldo5; - struct regulator_init_data *ldo6; - struct regulator_init_data *ldo7; - struct regulator_init_data *ldoln; - struct regulator_init_data *ldousb; - /* TWL6032 DCDC regulators */ - struct regulator_init_data *smps3; - struct regulator_init_data *smps4; - struct regulator_init_data *vio6025; -}; - -struct twl_regulator_driver_data { - int (*set_voltage)(void *data, int target_uV); - int (*get_voltage)(void *data); - void *data; - unsigned long features; -}; -/* chip-specific feature flags, for twl_regulator_driver_data.features */ -#define TWL4030_VAUX2 BIT(0) /* pre-5030 voltage ranges */ -#define TPS_SUBSET BIT(1) /* tps659[23]0 have fewer LDOs */ -#define TWL5031 BIT(2) /* twl5031 has different registers */ -#define TWL6030_CLASS BIT(3) /* TWL6030 class */ -#define TWL6032_SUBCLASS BIT(4) /* TWL6032 has changed registers */ -#define TWL4030_ALLOW_UNSUPPORTED BIT(5) /* Some voltages are possible - * but not officially supported. - * This flag is necessary to - * enable them. - */ - -/*----------------------------------------------------------------------*/ - -int twl4030_sih_setup(struct device *dev, int module, int irq_base); - -/* Offsets to Power Registers */ -#define TWL4030_VDAC_DEV_GRP 0x3B -#define TWL4030_VDAC_DEDICATED 0x3E -#define TWL4030_VAUX1_DEV_GRP 0x17 -#define TWL4030_VAUX1_DEDICATED 0x1A -#define TWL4030_VAUX2_DEV_GRP 0x1B -#define TWL4030_VAUX2_DEDICATED 0x1E -#define TWL4030_VAUX3_DEV_GRP 0x1F -#define TWL4030_VAUX3_DEDICATED 0x22 - -static inline int twl4030charger_usb_en(int enable) { return 0; } - -/*----------------------------------------------------------------------*/ - -/* Linux-specific regulator identifiers ... for now, we only support - * the LDOs, and leave the three buck converters alone. VDD1 and VDD2 - * need to tie into hardware based voltage scaling (cpufreq etc), while - * VIO is generally fixed. - */ - -/* TWL4030 SMPS/LDO's */ -/* EXTERNAL dc-to-dc buck converters */ -#define TWL4030_REG_VDD1 0 -#define TWL4030_REG_VDD2 1 -#define TWL4030_REG_VIO 2 - -/* EXTERNAL LDOs */ -#define TWL4030_REG_VDAC 3 -#define TWL4030_REG_VPLL1 4 -#define TWL4030_REG_VPLL2 5 /* not on all chips */ -#define TWL4030_REG_VMMC1 6 -#define TWL4030_REG_VMMC2 7 /* not on all chips */ -#define TWL4030_REG_VSIM 8 /* not on all chips */ -#define TWL4030_REG_VAUX1 9 /* not on all chips */ -#define TWL4030_REG_VAUX2_4030 10 /* (twl4030-specific) */ -#define TWL4030_REG_VAUX2 11 /* (twl5030 and newer) */ -#define TWL4030_REG_VAUX3 12 /* not on all chips */ -#define TWL4030_REG_VAUX4 13 /* not on all chips */ - -/* INTERNAL LDOs */ -#define TWL4030_REG_VINTANA1 14 -#define TWL4030_REG_VINTANA2 15 -#define TWL4030_REG_VINTDIG 16 -#define TWL4030_REG_VUSB1V5 17 -#define TWL4030_REG_VUSB1V8 18 -#define TWL4030_REG_VUSB3V1 19 - -/* TWL6030 SMPS/LDO's */ -/* EXTERNAL dc-to-dc buck convertor controllable via SR */ -#define TWL6030_REG_VDD1 30 -#define TWL6030_REG_VDD2 31 -#define TWL6030_REG_VDD3 32 - -/* Non SR compliant dc-to-dc buck convertors */ -#define TWL6030_REG_VMEM 33 -#define TWL6030_REG_V2V1 34 -#define TWL6030_REG_V1V29 35 -#define TWL6030_REG_V1V8 36 - -/* EXTERNAL LDOs */ -#define TWL6030_REG_VAUX1_6030 37 -#define TWL6030_REG_VAUX2_6030 38 -#define TWL6030_REG_VAUX3_6030 39 -#define TWL6030_REG_VMMC 40 -#define TWL6030_REG_VPP 41 -#define TWL6030_REG_VUSIM 42 -#define TWL6030_REG_VANA 43 -#define TWL6030_REG_VCXIO 44 -#define TWL6030_REG_VDAC 45 -#define TWL6030_REG_VUSB 46 - -/* INTERNAL LDOs */ -#define TWL6030_REG_VRTC 47 -#define TWL6030_REG_CLK32KG 48 - -/* LDOs on 6025 have different names */ -#define TWL6032_REG_LDO2 49 -#define TWL6032_REG_LDO4 50 -#define TWL6032_REG_LDO3 51 -#define TWL6032_REG_LDO5 52 -#define TWL6032_REG_LDO1 53 -#define TWL6032_REG_LDO7 54 -#define TWL6032_REG_LDO6 55 -#define TWL6032_REG_LDOLN 56 -#define TWL6032_REG_LDOUSB 57 - -/* 6025 DCDC supplies */ -#define TWL6032_REG_SMPS3 58 -#define TWL6032_REG_SMPS4 59 -#define TWL6032_REG_VIO 60 - - -#endif /* End of __TWL4030_H */ diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h new file mode 100644 index 000000000000..9ad7828d9d34 --- /dev/null +++ b/include/linux/mfd/twl.h @@ -0,0 +1,876 @@ +/* + * twl4030.h - header for TWL4030 PM and audio CODEC device + * + * Copyright (C) 2005-2006 Texas Instruments, Inc. + * + * Based on tlv320aic23.c: + * Copyright (c) by Kai Svahn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __TWL_H_ +#define __TWL_H_ + +#include +#include + +/* + * Using the twl4030 core we address registers using a pair + * { module id, relative register offset } + * which that core then maps to the relevant + * { i2c slave, absolute register address } + * + * The module IDs are meaningful only to the twl4030 core code, + * which uses them as array indices to look up the first register + * address each module uses within a given i2c slave. + */ + +/* Module IDs for similar functionalities found in twl4030/twl6030 */ +enum twl_module_ids { + TWL_MODULE_USB, + TWL_MODULE_PIH, + TWL_MODULE_MAIN_CHARGE, + TWL_MODULE_PM_MASTER, + TWL_MODULE_PM_RECEIVER, + + TWL_MODULE_RTC, + TWL_MODULE_PWM, + TWL_MODULE_LED, + TWL_MODULE_SECURED_REG, + + TWL_MODULE_LAST, +}; + +/* Modules only available in twl4030 series */ +enum twl4030_module_ids { + TWL4030_MODULE_AUDIO_VOICE = TWL_MODULE_LAST, + TWL4030_MODULE_GPIO, + TWL4030_MODULE_INTBR, + TWL4030_MODULE_TEST, + TWL4030_MODULE_KEYPAD, + + TWL4030_MODULE_MADC, + TWL4030_MODULE_INTERRUPTS, + TWL4030_MODULE_PRECHARGE, + TWL4030_MODULE_BACKUP, + TWL4030_MODULE_INT, + + TWL5031_MODULE_ACCESSORY, + TWL5031_MODULE_INTERRUPTS, + + TWL4030_MODULE_LAST, +}; + +/* Modules only available in twl6030 series */ +enum twl6030_module_ids { + TWL6030_MODULE_ID0 = TWL_MODULE_LAST, + TWL6030_MODULE_ID1, + TWL6030_MODULE_ID2, + TWL6030_MODULE_GPADC, + TWL6030_MODULE_GASGAUGE, + + TWL6030_MODULE_LAST, +}; + +/* Until the clients has been converted to use TWL_MODULE_LED */ +#define TWL4030_MODULE_LED TWL_MODULE_LED + +#define GPIO_INTR_OFFSET 0 +#define KEYPAD_INTR_OFFSET 1 +#define BCI_INTR_OFFSET 2 +#define MADC_INTR_OFFSET 3 +#define USB_INTR_OFFSET 4 +#define CHARGERFAULT_INTR_OFFSET 5 +#define BCI_PRES_INTR_OFFSET 9 +#define USB_PRES_INTR_OFFSET 10 +#define RTC_INTR_OFFSET 11 + +/* + * Offset from TWL6030_IRQ_BASE / pdata->irq_base + */ +#define PWR_INTR_OFFSET 0 +#define HOTDIE_INTR_OFFSET 12 +#define SMPSLDO_INTR_OFFSET 13 +#define BATDETECT_INTR_OFFSET 14 +#define SIMDETECT_INTR_OFFSET 15 +#define MMCDETECT_INTR_OFFSET 16 +#define GASGAUGE_INTR_OFFSET 17 +#define USBOTG_INTR_OFFSET 4 +#define CHARGER_INTR_OFFSET 2 +#define RSV_INTR_OFFSET 0 + +/* INT register offsets */ +#define REG_INT_STS_A 0x00 +#define REG_INT_STS_B 0x01 +#define REG_INT_STS_C 0x02 + +#define REG_INT_MSK_LINE_A 0x03 +#define REG_INT_MSK_LINE_B 0x04 +#define REG_INT_MSK_LINE_C 0x05 + +#define REG_INT_MSK_STS_A 0x06 +#define REG_INT_MSK_STS_B 0x07 +#define REG_INT_MSK_STS_C 0x08 + +/* MASK INT REG GROUP A */ +#define TWL6030_PWR_INT_MASK 0x07 +#define TWL6030_RTC_INT_MASK 0x18 +#define TWL6030_HOTDIE_INT_MASK 0x20 +#define TWL6030_SMPSLDOA_INT_MASK 0xC0 + +/* MASK INT REG GROUP B */ +#define TWL6030_SMPSLDOB_INT_MASK 0x01 +#define TWL6030_BATDETECT_INT_MASK 0x02 +#define TWL6030_SIMDETECT_INT_MASK 0x04 +#define TWL6030_MMCDETECT_INT_MASK 0x08 +#define TWL6030_GPADC_INT_MASK 0x60 +#define TWL6030_GASGAUGE_INT_MASK 0x80 + +/* MASK INT REG GROUP C */ +#define TWL6030_USBOTG_INT_MASK 0x0F +#define TWL6030_CHARGER_CTRL_INT_MASK 0x10 +#define TWL6030_CHARGER_FAULT_INT_MASK 0x60 + +#define TWL6030_MMCCTRL 0xEE +#define VMMC_AUTO_OFF (0x1 << 3) +#define SW_FC (0x1 << 2) +#define STS_MMC 0x1 + +#define TWL6030_CFG_INPUT_PUPD3 0xF2 +#define MMC_PU (0x1 << 3) +#define MMC_PD (0x1 << 2) + +#define TWL_SIL_TYPE(rev) ((rev) & 0x00FFFFFF) +#define TWL_SIL_REV(rev) ((rev) >> 24) +#define TWL_SIL_5030 0x09002F +#define TWL5030_REV_1_0 0x00 +#define TWL5030_REV_1_1 0x10 +#define TWL5030_REV_1_2 0x30 + +#define TWL4030_CLASS_ID 0x4030 +#define TWL6030_CLASS_ID 0x6030 +unsigned int twl_rev(void); +#define GET_TWL_REV (twl_rev()) +#define TWL_CLASS_IS(class, id) \ +static inline int twl_class_is_ ##class(void) \ +{ \ + return ((id) == (GET_TWL_REV)) ? 1 : 0; \ +} + +TWL_CLASS_IS(4030, TWL4030_CLASS_ID) +TWL_CLASS_IS(6030, TWL6030_CLASS_ID) + +/* Set the regcache bypass for the regmap associated with the nodule */ +int twl_set_regcache_bypass(u8 mod_no, bool enable); + +/* + * Read and write several 8-bit registers at once. + */ +int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); +int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes); + +/* + * Read and write single 8-bit registers + */ +static inline int twl_i2c_write_u8(u8 mod_no, u8 val, u8 reg) { + return twl_i2c_write(mod_no, &val, reg, 1); +} + +static inline int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg) { + return twl_i2c_read(mod_no, val, reg, 1); +} + +static inline int twl_i2c_write_u16(u8 mod_no, u16 val, u8 reg) { + val = cpu_to_le16(val); + return twl_i2c_write(mod_no, (u8*) &val, reg, 2); +} + +static inline int twl_i2c_read_u16(u8 mod_no, u16 *val, u8 reg) { + int ret; + ret = twl_i2c_read(mod_no, (u8*) val, reg, 2); + *val = le16_to_cpu(*val); + return ret; +} + +int twl_get_type(void); +int twl_get_version(void); +int twl_get_hfclk_rate(void); + +int twl6030_interrupt_unmask(u8 bit_mask, u8 offset); +int twl6030_interrupt_mask(u8 bit_mask, u8 offset); + +/* Card detect Configuration for MMC1 Controller on OMAP4 */ +#ifdef CONFIG_TWL4030_CORE +int twl6030_mmc_card_detect_config(void); +#else +static inline int twl6030_mmc_card_detect_config(void) +{ + pr_debug("twl6030_mmc_card_detect_config not supported\n"); + return 0; +} +#endif + +/* MMC1 Controller on OMAP4 uses Phoenix irq for Card detect */ +#ifdef CONFIG_TWL4030_CORE +int twl6030_mmc_card_detect(struct device *dev, int slot); +#else +static inline int twl6030_mmc_card_detect(struct device *dev, int slot) +{ + pr_debug("Call back twl6030_mmc_card_detect not supported\n"); + return -EIO; +} +#endif +/*----------------------------------------------------------------------*/ + +/* + * NOTE: at up to 1024 registers, this is a big chip. + * + * Avoid putting register declarations in this file, instead of into + * a driver-private file, unless some of the registers in a block + * need to be shared with other drivers. One example is blocks that + * have Secondary IRQ Handler (SIH) registers. + */ + +#define TWL4030_SIH_CTRL_EXCLEN_MASK BIT(0) +#define TWL4030_SIH_CTRL_PENDDIS_MASK BIT(1) +#define TWL4030_SIH_CTRL_COR_MASK BIT(2) + +/*----------------------------------------------------------------------*/ + +/* + * GPIO Block Register offsets (use TWL4030_MODULE_GPIO) + */ + +#define REG_GPIODATAIN1 0x0 +#define REG_GPIODATAIN2 0x1 +#define REG_GPIODATAIN3 0x2 +#define REG_GPIODATADIR1 0x3 +#define REG_GPIODATADIR2 0x4 +#define REG_GPIODATADIR3 0x5 +#define REG_GPIODATAOUT1 0x6 +#define REG_GPIODATAOUT2 0x7 +#define REG_GPIODATAOUT3 0x8 +#define REG_CLEARGPIODATAOUT1 0x9 +#define REG_CLEARGPIODATAOUT2 0xA +#define REG_CLEARGPIODATAOUT3 0xB +#define REG_SETGPIODATAOUT1 0xC +#define REG_SETGPIODATAOUT2 0xD +#define REG_SETGPIODATAOUT3 0xE +#define REG_GPIO_DEBEN1 0xF +#define REG_GPIO_DEBEN2 0x10 +#define REG_GPIO_DEBEN3 0x11 +#define REG_GPIO_CTRL 0x12 +#define REG_GPIOPUPDCTR1 0x13 +#define REG_GPIOPUPDCTR2 0x14 +#define REG_GPIOPUPDCTR3 0x15 +#define REG_GPIOPUPDCTR4 0x16 +#define REG_GPIOPUPDCTR5 0x17 +#define REG_GPIO_ISR1A 0x19 +#define REG_GPIO_ISR2A 0x1A +#define REG_GPIO_ISR3A 0x1B +#define REG_GPIO_IMR1A 0x1C +#define REG_GPIO_IMR2A 0x1D +#define REG_GPIO_IMR3A 0x1E +#define REG_GPIO_ISR1B 0x1F +#define REG_GPIO_ISR2B 0x20 +#define REG_GPIO_ISR3B 0x21 +#define REG_GPIO_IMR1B 0x22 +#define REG_GPIO_IMR2B 0x23 +#define REG_GPIO_IMR3B 0x24 +#define REG_GPIO_EDR1 0x28 +#define REG_GPIO_EDR2 0x29 +#define REG_GPIO_EDR3 0x2A +#define REG_GPIO_EDR4 0x2B +#define REG_GPIO_EDR5 0x2C +#define REG_GPIO_SIH_CTRL 0x2D + +/* Up to 18 signals are available as GPIOs, when their + * pins are not assigned to another use (such as ULPI/USB). + */ +#define TWL4030_GPIO_MAX 18 + +/*----------------------------------------------------------------------*/ + +/*Interface Bit Register (INTBR) offsets + *(Use TWL_4030_MODULE_INTBR) + */ + +#define REG_IDCODE_7_0 0x00 +#define REG_IDCODE_15_8 0x01 +#define REG_IDCODE_16_23 0x02 +#define REG_IDCODE_31_24 0x03 +#define REG_GPPUPDCTR1 0x0F +#define REG_UNLOCK_TEST_REG 0x12 + +/*I2C1 and I2C4(SR) SDA/SCL pull-up control bits */ + +#define I2C_SCL_CTRL_PU BIT(0) +#define I2C_SDA_CTRL_PU BIT(2) +#define SR_I2C_SCL_CTRL_PU BIT(4) +#define SR_I2C_SDA_CTRL_PU BIT(6) + +#define TWL_EEPROM_R_UNLOCK 0x49 + +/*----------------------------------------------------------------------*/ + +/* + * Keypad register offsets (use TWL4030_MODULE_KEYPAD) + * ... SIH/interrupt only + */ + +#define TWL4030_KEYPAD_KEYP_ISR1 0x11 +#define TWL4030_KEYPAD_KEYP_IMR1 0x12 +#define TWL4030_KEYPAD_KEYP_ISR2 0x13 +#define TWL4030_KEYPAD_KEYP_IMR2 0x14 +#define TWL4030_KEYPAD_KEYP_SIR 0x15 /* test register */ +#define TWL4030_KEYPAD_KEYP_EDR 0x16 +#define TWL4030_KEYPAD_KEYP_SIH_CTRL 0x17 + +/*----------------------------------------------------------------------*/ + +/* + * Multichannel ADC register offsets (use TWL4030_MODULE_MADC) + * ... SIH/interrupt only + */ + +#define TWL4030_MADC_ISR1 0x61 +#define TWL4030_MADC_IMR1 0x62 +#define TWL4030_MADC_ISR2 0x63 +#define TWL4030_MADC_IMR2 0x64 +#define TWL4030_MADC_SIR 0x65 /* test register */ +#define TWL4030_MADC_EDR 0x66 +#define TWL4030_MADC_SIH_CTRL 0x67 + +/*----------------------------------------------------------------------*/ + +/* + * Battery charger register offsets (use TWL4030_MODULE_INTERRUPTS) + */ + +#define TWL4030_INTERRUPTS_BCIISR1A 0x0 +#define TWL4030_INTERRUPTS_BCIISR2A 0x1 +#define TWL4030_INTERRUPTS_BCIIMR1A 0x2 +#define TWL4030_INTERRUPTS_BCIIMR2A 0x3 +#define TWL4030_INTERRUPTS_BCIISR1B 0x4 +#define TWL4030_INTERRUPTS_BCIISR2B 0x5 +#define TWL4030_INTERRUPTS_BCIIMR1B 0x6 +#define TWL4030_INTERRUPTS_BCIIMR2B 0x7 +#define TWL4030_INTERRUPTS_BCISIR1 0x8 /* test register */ +#define TWL4030_INTERRUPTS_BCISIR2 0x9 /* test register */ +#define TWL4030_INTERRUPTS_BCIEDR1 0xa +#define TWL4030_INTERRUPTS_BCIEDR2 0xb +#define TWL4030_INTERRUPTS_BCIEDR3 0xc +#define TWL4030_INTERRUPTS_BCISIHCTRL 0xd + +/*----------------------------------------------------------------------*/ + +/* + * Power Interrupt block register offsets (use TWL4030_MODULE_INT) + */ + +#define TWL4030_INT_PWR_ISR1 0x0 +#define TWL4030_INT_PWR_IMR1 0x1 +#define TWL4030_INT_PWR_ISR2 0x2 +#define TWL4030_INT_PWR_IMR2 0x3 +#define TWL4030_INT_PWR_SIR 0x4 /* test register */ +#define TWL4030_INT_PWR_EDR1 0x5 +#define TWL4030_INT_PWR_EDR2 0x6 +#define TWL4030_INT_PWR_SIH_CTRL 0x7 + +/*----------------------------------------------------------------------*/ + +/* + * Accessory Interrupts + */ +#define TWL5031_ACIIMR_LSB 0x05 +#define TWL5031_ACIIMR_MSB 0x06 +#define TWL5031_ACIIDR_LSB 0x07 +#define TWL5031_ACIIDR_MSB 0x08 +#define TWL5031_ACCISR1 0x0F +#define TWL5031_ACCIMR1 0x10 +#define TWL5031_ACCISR2 0x11 +#define TWL5031_ACCIMR2 0x12 +#define TWL5031_ACCSIR 0x13 +#define TWL5031_ACCEDR1 0x14 +#define TWL5031_ACCSIHCTRL 0x15 + +/*----------------------------------------------------------------------*/ + +/* + * Battery Charger Controller + */ + +#define TWL5031_INTERRUPTS_BCIISR1 0x0 +#define TWL5031_INTERRUPTS_BCIIMR1 0x1 +#define TWL5031_INTERRUPTS_BCIISR2 0x2 +#define TWL5031_INTERRUPTS_BCIIMR2 0x3 +#define TWL5031_INTERRUPTS_BCISIR 0x4 +#define TWL5031_INTERRUPTS_BCIEDR1 0x5 +#define TWL5031_INTERRUPTS_BCIEDR2 0x6 +#define TWL5031_INTERRUPTS_BCISIHCTRL 0x7 + +/*----------------------------------------------------------------------*/ + +/* + * PM Master module register offsets (use TWL4030_MODULE_PM_MASTER) + */ + +#define TWL4030_PM_MASTER_CFG_P1_TRANSITION 0x00 +#define TWL4030_PM_MASTER_CFG_P2_TRANSITION 0x01 +#define TWL4030_PM_MASTER_CFG_P3_TRANSITION 0x02 +#define TWL4030_PM_MASTER_CFG_P123_TRANSITION 0x03 +#define TWL4030_PM_MASTER_STS_BOOT 0x04 +#define TWL4030_PM_MASTER_CFG_BOOT 0x05 +#define TWL4030_PM_MASTER_SHUNDAN 0x06 +#define TWL4030_PM_MASTER_BOOT_BCI 0x07 +#define TWL4030_PM_MASTER_CFG_PWRANA1 0x08 +#define TWL4030_PM_MASTER_CFG_PWRANA2 0x09 +#define TWL4030_PM_MASTER_BACKUP_MISC_STS 0x0b +#define TWL4030_PM_MASTER_BACKUP_MISC_CFG 0x0c +#define TWL4030_PM_MASTER_BACKUP_MISC_TST 0x0d +#define TWL4030_PM_MASTER_PROTECT_KEY 0x0e +#define TWL4030_PM_MASTER_STS_HW_CONDITIONS 0x0f +#define TWL4030_PM_MASTER_P1_SW_EVENTS 0x10 +#define TWL4030_PM_MASTER_P2_SW_EVENTS 0x11 +#define TWL4030_PM_MASTER_P3_SW_EVENTS 0x12 +#define TWL4030_PM_MASTER_STS_P123_STATE 0x13 +#define TWL4030_PM_MASTER_PB_CFG 0x14 +#define TWL4030_PM_MASTER_PB_WORD_MSB 0x15 +#define TWL4030_PM_MASTER_PB_WORD_LSB 0x16 +#define TWL4030_PM_MASTER_SEQ_ADD_W2P 0x1c +#define TWL4030_PM_MASTER_SEQ_ADD_P2A 0x1d +#define TWL4030_PM_MASTER_SEQ_ADD_A2W 0x1e +#define TWL4030_PM_MASTER_SEQ_ADD_A2S 0x1f +#define TWL4030_PM_MASTER_SEQ_ADD_S2A12 0x20 +#define TWL4030_PM_MASTER_SEQ_ADD_S2A3 0x21 +#define TWL4030_PM_MASTER_SEQ_ADD_WARM 0x22 +#define TWL4030_PM_MASTER_MEMORY_ADDRESS 0x23 +#define TWL4030_PM_MASTER_MEMORY_DATA 0x24 + +#define TWL4030_PM_MASTER_KEY_CFG1 0xc0 +#define TWL4030_PM_MASTER_KEY_CFG2 0x0c + +#define TWL4030_PM_MASTER_KEY_TST1 0xe0 +#define TWL4030_PM_MASTER_KEY_TST2 0x0e + +#define TWL4030_PM_MASTER_GLOBAL_TST 0xb6 + +/*----------------------------------------------------------------------*/ + +/* Power bus message definitions */ + +/* The TWL4030/5030 splits its power-management resources (the various + * regulators, clock and reset lines) into 3 processor groups - P1, P2 and + * P3. These groups can then be configured to transition between sleep, wait-on + * and active states by sending messages to the power bus. See Section 5.4.2 + * Power Resources of TWL4030 TRM + */ + +/* Processor groups */ +#define DEV_GRP_NULL 0x0 +#define DEV_GRP_P1 0x1 /* P1: all OMAP devices */ +#define DEV_GRP_P2 0x2 /* P2: all Modem devices */ +#define DEV_GRP_P3 0x4 /* P3: all peripheral devices */ + +/* Resource groups */ +#define RES_GRP_RES 0x0 /* Reserved */ +#define RES_GRP_PP 0x1 /* Power providers */ +#define RES_GRP_RC 0x2 /* Reset and control */ +#define RES_GRP_PP_RC 0x3 +#define RES_GRP_PR 0x4 /* Power references */ +#define RES_GRP_PP_PR 0x5 +#define RES_GRP_RC_PR 0x6 +#define RES_GRP_ALL 0x7 /* All resource groups */ + +#define RES_TYPE2_R0 0x0 +#define RES_TYPE2_R1 0x1 +#define RES_TYPE2_R2 0x2 + +#define RES_TYPE_R0 0x0 +#define RES_TYPE_ALL 0x7 + +/* Resource states */ +#define RES_STATE_WRST 0xF +#define RES_STATE_ACTIVE 0xE +#define RES_STATE_SLEEP 0x8 +#define RES_STATE_OFF 0x0 + +/* Power resources */ + +/* Power providers */ +#define RES_VAUX1 1 +#define RES_VAUX2 2 +#define RES_VAUX3 3 +#define RES_VAUX4 4 +#define RES_VMMC1 5 +#define RES_VMMC2 6 +#define RES_VPLL1 7 +#define RES_VPLL2 8 +#define RES_VSIM 9 +#define RES_VDAC 10 +#define RES_VINTANA1 11 +#define RES_VINTANA2 12 +#define RES_VINTDIG 13 +#define RES_VIO 14 +#define RES_VDD1 15 +#define RES_VDD2 16 +#define RES_VUSB_1V5 17 +#define RES_VUSB_1V8 18 +#define RES_VUSB_3V1 19 +#define RES_VUSBCP 20 +#define RES_REGEN 21 +/* Reset and control */ +#define RES_NRES_PWRON 22 +#define RES_CLKEN 23 +#define RES_SYSEN 24 +#define RES_HFCLKOUT 25 +#define RES_32KCLKOUT 26 +#define RES_RESET 27 +/* Power Reference */ +#define RES_MAIN_REF 28 + +#define TOTAL_RESOURCES 28 +/* + * Power Bus Message Format ... these can be sent individually by Linux, + * but are usually part of downloaded scripts that are run when various + * power events are triggered. + * + * Broadcast Message (16 Bits): + * DEV_GRP[15:13] MT[12] RES_GRP[11:9] RES_TYPE2[8:7] RES_TYPE[6:4] + * RES_STATE[3:0] + * + * Singular Message (16 Bits): + * DEV_GRP[15:13] MT[12] RES_ID[11:4] RES_STATE[3:0] + */ + +#define MSG_BROADCAST(devgrp, grp, type, type2, state) \ + ( (devgrp) << 13 | 1 << 12 | (grp) << 9 | (type2) << 7 \ + | (type) << 4 | (state)) + +#define MSG_SINGULAR(devgrp, id, state) \ + ((devgrp) << 13 | 0 << 12 | (id) << 4 | (state)) + +#define MSG_BROADCAST_ALL(devgrp, state) \ + ((devgrp) << 5 | (state)) + +#define MSG_BROADCAST_REF MSG_BROADCAST_ALL +#define MSG_BROADCAST_PROV MSG_BROADCAST_ALL +#define MSG_BROADCAST__CLK_RST MSG_BROADCAST_ALL +/*----------------------------------------------------------------------*/ + +struct twl4030_clock_init_data { + bool ck32k_lowpwr_enable; +}; + +struct twl4030_bci_platform_data { + int *battery_tmp_tbl; + unsigned int tblsize; + int bb_uvolt; /* voltage to charge backup battery */ + int bb_uamp; /* current for backup battery charging */ +}; + +/* TWL4030_GPIO_MAX (18) GPIOs, with interrupts */ +struct twl4030_gpio_platform_data { + /* package the two LED signals as output-only GPIOs? */ + bool use_leds; + + /* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */ + u8 mmc_cd; + + /* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */ + u32 debounce; + + /* For gpio-N, bit (1 << N) in "pullups" is set if that pullup + * should be enabled. Else, if that bit is set in "pulldowns", + * that pulldown is enabled. Don't waste power by letting any + * digital inputs float... + */ + u32 pullups; + u32 pulldowns; + + int (*setup)(struct device *dev, + unsigned gpio, unsigned ngpio); + int (*teardown)(struct device *dev, + unsigned gpio, unsigned ngpio); +}; + +struct twl4030_madc_platform_data { + int irq_line; +}; + +/* Boards have unique mappings of {row, col} --> keycode. + * Column and row are 8 bits each, but range only from 0..7. + * a PERSISTENT_KEY is "always on" and never reported. + */ +#define PERSISTENT_KEY(r, c) KEY((r), (c), KEY_RESERVED) + +struct twl4030_keypad_data { + const struct matrix_keymap_data *keymap_data; + unsigned rows; + unsigned cols; + bool rep; +}; + +enum twl4030_usb_mode { + T2_USB_MODE_ULPI = 1, + T2_USB_MODE_CEA2011_3PIN = 2, +}; + +struct twl4030_usb_data { + enum twl4030_usb_mode usb_mode; + unsigned long features; + + int (*phy_init)(struct device *dev); + int (*phy_exit)(struct device *dev); + /* Power on/off the PHY */ + int (*phy_power)(struct device *dev, int iD, int on); + /* enable/disable phy clocks */ + int (*phy_set_clock)(struct device *dev, int on); + /* suspend/resume of phy */ + int (*phy_suspend)(struct device *dev, int suspend); +}; + +struct twl4030_ins { + u16 pmb_message; + u8 delay; +}; + +struct twl4030_script { + struct twl4030_ins *script; + unsigned size; + u8 flags; +#define TWL4030_WRST_SCRIPT (1<<0) +#define TWL4030_WAKEUP12_SCRIPT (1<<1) +#define TWL4030_WAKEUP3_SCRIPT (1<<2) +#define TWL4030_SLEEP_SCRIPT (1<<3) +}; + +struct twl4030_resconfig { + u8 resource; + u8 devgroup; /* Processor group that Power resource belongs to */ + u8 type; /* Power resource addressed, 6 / broadcast message */ + u8 type2; /* Power resource addressed, 3 / broadcast message */ + u8 remap_off; /* off state remapping */ + u8 remap_sleep; /* sleep state remapping */ +}; + +struct twl4030_power_data { + struct twl4030_script **scripts; + unsigned num; + struct twl4030_resconfig *resource_config; + struct twl4030_resconfig *board_config; +#define TWL4030_RESCONFIG_UNDEF ((u8)-1) + bool use_poweroff; /* Board is wired for TWL poweroff */ + bool ac_charger_quirk; /* Disable AC charger on board */ +}; + +extern int twl4030_remove_script(u8 flags); +extern void twl4030_power_off(void); + +struct twl4030_codec_data { + unsigned int digimic_delay; /* in ms */ + unsigned int ramp_delay_value; + unsigned int offset_cncl_path; + unsigned int hs_extmute:1; + int hs_extmute_gpio; +}; + +struct twl4030_vibra_data { + unsigned int coexist; +}; + +struct twl4030_audio_data { + unsigned int audio_mclk; + struct twl4030_codec_data *codec; + struct twl4030_vibra_data *vibra; + + /* twl6040 */ + int audpwron_gpio; /* audio power-on gpio */ + int naudint_irq; /* audio interrupt */ + unsigned int irq_base; +}; + +struct twl4030_platform_data { + struct twl4030_clock_init_data *clock; + struct twl4030_bci_platform_data *bci; + struct twl4030_gpio_platform_data *gpio; + struct twl4030_madc_platform_data *madc; + struct twl4030_keypad_data *keypad; + struct twl4030_usb_data *usb; + struct twl4030_power_data *power; + struct twl4030_audio_data *audio; + + /* Common LDO regulators for TWL4030/TWL6030 */ + struct regulator_init_data *vdac; + struct regulator_init_data *vaux1; + struct regulator_init_data *vaux2; + struct regulator_init_data *vaux3; + struct regulator_init_data *vdd1; + struct regulator_init_data *vdd2; + struct regulator_init_data *vdd3; + /* TWL4030 LDO regulators */ + struct regulator_init_data *vpll1; + struct regulator_init_data *vpll2; + struct regulator_init_data *vmmc1; + struct regulator_init_data *vmmc2; + struct regulator_init_data *vsim; + struct regulator_init_data *vaux4; + struct regulator_init_data *vio; + struct regulator_init_data *vintana1; + struct regulator_init_data *vintana2; + struct regulator_init_data *vintdig; + /* TWL6030 LDO regulators */ + struct regulator_init_data *vmmc; + struct regulator_init_data *vpp; + struct regulator_init_data *vusim; + struct regulator_init_data *vana; + struct regulator_init_data *vcxio; + struct regulator_init_data *vusb; + struct regulator_init_data *clk32kg; + struct regulator_init_data *v1v8; + struct regulator_init_data *v2v1; + /* TWL6032 LDO regulators */ + struct regulator_init_data *ldo1; + struct regulator_init_data *ldo2; + struct regulator_init_data *ldo3; + struct regulator_init_data *ldo4; + struct regulator_init_data *ldo5; + struct regulator_init_data *ldo6; + struct regulator_init_data *ldo7; + struct regulator_init_data *ldoln; + struct regulator_init_data *ldousb; + /* TWL6032 DCDC regulators */ + struct regulator_init_data *smps3; + struct regulator_init_data *smps4; + struct regulator_init_data *vio6025; +}; + +struct twl_regulator_driver_data { + int (*set_voltage)(void *data, int target_uV); + int (*get_voltage)(void *data); + void *data; + unsigned long features; +}; +/* chip-specific feature flags, for twl_regulator_driver_data.features */ +#define TWL4030_VAUX2 BIT(0) /* pre-5030 voltage ranges */ +#define TPS_SUBSET BIT(1) /* tps659[23]0 have fewer LDOs */ +#define TWL5031 BIT(2) /* twl5031 has different registers */ +#define TWL6030_CLASS BIT(3) /* TWL6030 class */ +#define TWL6032_SUBCLASS BIT(4) /* TWL6032 has changed registers */ +#define TWL4030_ALLOW_UNSUPPORTED BIT(5) /* Some voltages are possible + * but not officially supported. + * This flag is necessary to + * enable them. + */ + +/*----------------------------------------------------------------------*/ + +int twl4030_sih_setup(struct device *dev, int module, int irq_base); + +/* Offsets to Power Registers */ +#define TWL4030_VDAC_DEV_GRP 0x3B +#define TWL4030_VDAC_DEDICATED 0x3E +#define TWL4030_VAUX1_DEV_GRP 0x17 +#define TWL4030_VAUX1_DEDICATED 0x1A +#define TWL4030_VAUX2_DEV_GRP 0x1B +#define TWL4030_VAUX2_DEDICATED 0x1E +#define TWL4030_VAUX3_DEV_GRP 0x1F +#define TWL4030_VAUX3_DEDICATED 0x22 + +static inline int twl4030charger_usb_en(int enable) { return 0; } + +/*----------------------------------------------------------------------*/ + +/* Linux-specific regulator identifiers ... for now, we only support + * the LDOs, and leave the three buck converters alone. VDD1 and VDD2 + * need to tie into hardware based voltage scaling (cpufreq etc), while + * VIO is generally fixed. + */ + +/* TWL4030 SMPS/LDO's */ +/* EXTERNAL dc-to-dc buck converters */ +#define TWL4030_REG_VDD1 0 +#define TWL4030_REG_VDD2 1 +#define TWL4030_REG_VIO 2 + +/* EXTERNAL LDOs */ +#define TWL4030_REG_VDAC 3 +#define TWL4030_REG_VPLL1 4 +#define TWL4030_REG_VPLL2 5 /* not on all chips */ +#define TWL4030_REG_VMMC1 6 +#define TWL4030_REG_VMMC2 7 /* not on all chips */ +#define TWL4030_REG_VSIM 8 /* not on all chips */ +#define TWL4030_REG_VAUX1 9 /* not on all chips */ +#define TWL4030_REG_VAUX2_4030 10 /* (twl4030-specific) */ +#define TWL4030_REG_VAUX2 11 /* (twl5030 and newer) */ +#define TWL4030_REG_VAUX3 12 /* not on all chips */ +#define TWL4030_REG_VAUX4 13 /* not on all chips */ + +/* INTERNAL LDOs */ +#define TWL4030_REG_VINTANA1 14 +#define TWL4030_REG_VINTANA2 15 +#define TWL4030_REG_VINTDIG 16 +#define TWL4030_REG_VUSB1V5 17 +#define TWL4030_REG_VUSB1V8 18 +#define TWL4030_REG_VUSB3V1 19 + +/* TWL6030 SMPS/LDO's */ +/* EXTERNAL dc-to-dc buck convertor controllable via SR */ +#define TWL6030_REG_VDD1 30 +#define TWL6030_REG_VDD2 31 +#define TWL6030_REG_VDD3 32 + +/* Non SR compliant dc-to-dc buck convertors */ +#define TWL6030_REG_VMEM 33 +#define TWL6030_REG_V2V1 34 +#define TWL6030_REG_V1V29 35 +#define TWL6030_REG_V1V8 36 + +/* EXTERNAL LDOs */ +#define TWL6030_REG_VAUX1_6030 37 +#define TWL6030_REG_VAUX2_6030 38 +#define TWL6030_REG_VAUX3_6030 39 +#define TWL6030_REG_VMMC 40 +#define TWL6030_REG_VPP 41 +#define TWL6030_REG_VUSIM 42 +#define TWL6030_REG_VANA 43 +#define TWL6030_REG_VCXIO 44 +#define TWL6030_REG_VDAC 45 +#define TWL6030_REG_VUSB 46 + +/* INTERNAL LDOs */ +#define TWL6030_REG_VRTC 47 +#define TWL6030_REG_CLK32KG 48 + +/* LDOs on 6025 have different names */ +#define TWL6032_REG_LDO2 49 +#define TWL6032_REG_LDO4 50 +#define TWL6032_REG_LDO3 51 +#define TWL6032_REG_LDO5 52 +#define TWL6032_REG_LDO1 53 +#define TWL6032_REG_LDO7 54 +#define TWL6032_REG_LDO6 55 +#define TWL6032_REG_LDOLN 56 +#define TWL6032_REG_LDOUSB 57 + +/* 6025 DCDC supplies */ +#define TWL6032_REG_SMPS3 58 +#define TWL6032_REG_SMPS4 59 +#define TWL6032_REG_VIO 60 + + +#endif /* End of __TWL4030_H */ -- cgit From e8924005b4e7964313536547d4b73406330be26d Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 28 Aug 2017 12:04:07 +0200 Subject: mfd: Add STM32 LPTimer driver STM32 Low-Power Timer hardware block can be used for: - PWM generation - IIO trigger (in sync with PWM) - IIO quadrature encoder counter PWM and IIO timer configuration are mixed in the same registers so we need a multi fonction driver to be able to share those registers. Signed-off-by: Fabrice Gasnier Signed-off-by: Lee Jones --- include/linux/mfd/stm32-lptimer.h | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/linux/mfd/stm32-lptimer.h (limited to 'include') diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h new file mode 100644 index 000000000000..77c7cf40d9b4 --- /dev/null +++ b/include/linux/mfd/stm32-lptimer.h @@ -0,0 +1,62 @@ +/* + * STM32 Low-Power Timer parent driver. + * + * Copyright (C) STMicroelectronics 2017 + * + * Author: Fabrice Gasnier + * + * Inspired by Benjamin Gaignard's stm32-timers driver + * + * License terms: GNU General Public License (GPL), version 2 + */ + +#ifndef _LINUX_STM32_LPTIMER_H_ +#define _LINUX_STM32_LPTIMER_H_ + +#include +#include + +#define STM32_LPTIM_ISR 0x00 /* Interrupt and Status Reg */ +#define STM32_LPTIM_ICR 0x04 /* Interrupt Clear Reg */ +#define STM32_LPTIM_IER 0x08 /* Interrupt Enable Reg */ +#define STM32_LPTIM_CFGR 0x0C /* Configuration Reg */ +#define STM32_LPTIM_CR 0x10 /* Control Reg */ +#define STM32_LPTIM_CMP 0x14 /* Compare Reg */ +#define STM32_LPTIM_ARR 0x18 /* Autoreload Reg */ +#define STM32_LPTIM_CNT 0x1C /* Counter Reg */ + +/* STM32_LPTIM_ISR - bit fields */ +#define STM32_LPTIM_CMPOK_ARROK GENMASK(4, 3) +#define STM32_LPTIM_ARROK BIT(4) +#define STM32_LPTIM_CMPOK BIT(3) + +/* STM32_LPTIM_ICR - bit fields */ +#define STM32_LPTIM_CMPOKCF_ARROKCF GENMASK(4, 3) + +/* STM32_LPTIM_CR - bit fields */ +#define STM32_LPTIM_CNTSTRT BIT(2) +#define STM32_LPTIM_ENABLE BIT(0) + +/* STM32_LPTIM_CFGR - bit fields */ +#define STM32_LPTIM_ENC BIT(24) +#define STM32_LPTIM_COUNTMODE BIT(23) +#define STM32_LPTIM_WAVPOL BIT(21) +#define STM32_LPTIM_PRESC GENMASK(11, 9) +#define STM32_LPTIM_CKPOL GENMASK(2, 1) + +/* STM32_LPTIM_ARR */ +#define STM32_LPTIM_MAX_ARR 0xFFFF + +/** + * struct stm32_lptimer - STM32 Low-Power Timer data assigned by parent device + * @clk: clock reference for this instance + * @regmap: register map reference for this instance + * @has_encoder: indicates this Low-Power Timer supports encoder mode + */ +struct stm32_lptimer { + struct clk *clk; + struct regmap *regmap; + bool has_encoder; +}; + +#endif -- cgit From b01ced2b504b2592af6703533c62cb9d1cdc1c6c Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 28 Aug 2017 12:04:11 +0200 Subject: iio: trigger: Add STM32 LPTimer trigger driver Add support for LPTIMx_OUT triggers that can be found on some STM32 devices. These triggers can be used then by ADC or DAC. Typical usage is to configure LPTimer as PWM output (via pwm-stm32-lp) and have synchronised analog conversions with these triggers. Signed-off-by: Fabrice Gasnier Reviewed-by: Jonathan Cameron Signed-off-by: Lee Jones --- include/linux/iio/timer/stm32-lptim-trigger.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/iio/timer/stm32-lptim-trigger.h (limited to 'include') diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h new file mode 100644 index 000000000000..34d59bfdce2d --- /dev/null +++ b/include/linux/iio/timer/stm32-lptim-trigger.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) STMicroelectronics 2017 + * + * Author: Fabrice Gasnier + * + * License terms: GNU General Public License (GPL), version 2 + */ + +#ifndef _STM32_LPTIM_TRIGGER_H_ +#define _STM32_LPTIM_TRIGGER_H_ + +#include +#include + +#define LPTIM1_OUT "lptim1_out" +#define LPTIM2_OUT "lptim2_out" +#define LPTIM3_OUT "lptim3_out" + +#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER) +bool is_stm32_lptim_trigger(struct iio_trigger *trig); +#else +static inline bool is_stm32_lptim_trigger(struct iio_trigger *trig) +{ + return false; +} +#endif +#endif -- cgit From 2335ba704f32b855651d0cd15dd9b271ec565fb6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Sep 2017 23:55:59 +0200 Subject: netlink: add NLM_F_NONREC flag for deletion requests In the last NFWS in Faro, Portugal, we discussed that netlink is lacking the semantics to request non recursive deletions, ie. do not delete an object iff it has child objects that hang from this parent object that the user requests to be deleted. We need this new flag to solve a problem for the iptables-compat backward compatibility utility, that runs iptables commands using the existing nf_tables netlink interface. Specifically, custom chains in iptables cannot be deleted if there are rules in it, however, nf_tables allows to remove any chain that is populated with content. To sort out this asymmetry, iptables-compat userspace sets this new NLM_F_NONREC flag to obtain the same semantics that iptables provides. This new flag should only be used for deletion requests. Note this new flag value overlaps with the existing: * NLM_F_ROOT for get requests. * NLM_F_REPLACE for new requests. However, those flags should not ever be used in deletion requests. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index f4fc9c9e123d..e8af60a7c56d 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -69,6 +69,9 @@ struct nlmsghdr { #define NLM_F_CREATE 0x400 /* Create, if it does not exist */ #define NLM_F_APPEND 0x800 /* Add to end of list */ +/* Modifiers to DELETE request */ +#define NLM_F_NONREC 0x100 /* Do not delete recursively */ + /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ #define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ -- cgit From 1985296a3ccd5d89d2ec2ca17b1fa1a225f8ecd1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Sep 2017 12:07:24 -0400 Subject: fix the __user misannotations in asm-generic get_user/put_user Signed-off-by: Al Viro --- include/asm-generic/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 723e81a6c162..2e51f6e7b3c2 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -75,10 +75,10 @@ static inline int __access_ok(unsigned long addr, unsigned long size) #define put_user(x, ptr) \ ({ \ - void *__p = (ptr); \ + void __user *__p = (ptr); \ might_fault(); \ access_ok(VERIFY_WRITE, __p, sizeof(*ptr)) ? \ - __put_user((x), ((__typeof__(*(ptr)) *)__p)) : \ + __put_user((x), ((__typeof__(*(ptr)) __user *)__p)) : \ -EFAULT; \ }) @@ -137,10 +137,10 @@ extern int __put_user_bad(void) __attribute__((noreturn)); #define get_user(x, ptr) \ ({ \ - const void *__p = (ptr); \ + const void __user *__p = (ptr); \ might_fault(); \ access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \ - __get_user((x), (__typeof__(*(ptr)) *)__p) : \ + __get_user((x), (__typeof__(*(ptr)) __user *)__p) :\ ((x) = (__typeof__(*(ptr)))0,-EFAULT); \ }) -- cgit From 126534141b45d9d1b205fbe3f2321200074b76fd Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 20 Aug 2017 00:18:19 +0200 Subject: MIPS: lantiq: Add a GPHY driver which uses the RCU syscon-mfd Compared to the old xrx200_phy_fw driver the new version has multiple enhancements. The name of the firmware files does not have to be added to all .dts files anymore - one now configures the GPHY mode (FE or GE) instead. Each GPHY can now also boot separate firmware (thus mixing of GE and FE GPHYs is now possible). The new implementation is based on the RCU syscon-mfd and uses the reeset_controller framework instead of raw RCU register reads/writes. Signed-off-by: Martin Blumenstingl Signed-off-by: Hauke Mehrtens Reviewed-by: Andy Shevchenko Acked-by: Rob Herring Cc: john@phrozen.org Cc: p.zabel@pengutronix.de Cc: kishon@ti.com Cc: mark.rutland@arm.com Cc: linux-mips@linux-mips.org Cc: linux-mtd@lists.infradead.org Cc: linux-watchdog@vger.kernel.org Cc: devicetree@vger.kernel.org Cc: linux-spi@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17128/ Signed-off-by: Ralf Baechle --- include/dt-bindings/mips/lantiq_rcu_gphy.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/dt-bindings/mips/lantiq_rcu_gphy.h (limited to 'include') diff --git a/include/dt-bindings/mips/lantiq_rcu_gphy.h b/include/dt-bindings/mips/lantiq_rcu_gphy.h new file mode 100644 index 000000000000..fa1a63773342 --- /dev/null +++ b/include/dt-bindings/mips/lantiq_rcu_gphy.h @@ -0,0 +1,15 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * Copyright (C) 2016 Martin Blumenstingl + * Copyright (C) 2017 Hauke Mehrtens + */ +#ifndef _DT_BINDINGS_MIPS_LANTIQ_RCU_GPHY_H +#define _DT_BINDINGS_MIPS_LANTIQ_RCU_GPHY_H + +#define GPHY_MODE_GE 1 +#define GPHY_MODE_FE 2 + +#endif /* _DT_BINDINGS_MIPS_LANTIQ_RCU_GPHY_H */ -- cgit From 495e642939114478a5237a7d91661ba93b76f15a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 4 Sep 2017 21:42:22 +0200 Subject: vfs: add flags to d_real() Add a separate flags argument (in addition to the open flags) to control the behavior of d_real(). Signed-off-by: Miklos Szeredi --- include/linux/dcache.h | 11 ++++++----- include/linux/fs.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index aae1cdb76851..fd0721e520f4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -147,7 +147,7 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *, - unsigned int); + unsigned int, unsigned int); } ____cacheline_aligned; /* @@ -566,7 +566,8 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) * d_real - Return the real dentry * @dentry: the dentry to query * @inode: inode to select the dentry from multiple layers (can be NULL) - * @flags: open flags to control copy-up behavior + * @open_flags: open flags to control copy-up behavior + * @flags: flags to control what is returned by this function * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. @@ -575,10 +576,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) */ static inline struct dentry *d_real(struct dentry *dentry, const struct inode *inode, - unsigned int flags) + unsigned int open_flags, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode, flags); + return dentry->d_op->d_real(dentry, inode, open_flags, flags); else return dentry; } @@ -593,7 +594,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); + return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0)); } struct name_snapshot { diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..ee1db83c39cb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1233,7 +1233,7 @@ static inline struct inode *file_inode(const struct file *f) static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file), 0); + return d_real(file->f_path.dentry, file_inode(file), 0, 0); } static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) -- cgit From 91f9943e1c7b6638f27312d03fe71fcc67b23571 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Aug 2017 16:13:20 +0200 Subject: fs: support RWF_NOWAIT for buffered reads This is based on the old idea and code from Milosz Tanski. With the aio nowait code it becomes mostly trivial now. Buffered writes continue to return -EOPNOTSUPP if RWF_NOWAIT is passed. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index cbfe127bccf8..94582c379dac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -146,8 +146,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) -/* File is capable of returning -EAGAIN if AIO will block */ -#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000) +/* File is capable of returning -EAGAIN if I/O will block */ +#define FMODE_NOWAIT ((__force fmode_t)0x8000000) /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector @@ -3149,7 +3149,7 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags) return -EOPNOTSUPP; if (flags & RWF_NOWAIT) { - if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT)) + if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; ki->ki_flags |= IOCB_NOWAIT; } -- cgit From bdd1d2d3d251c65b74ac4493e08db18971c09240 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Sep 2017 17:39:13 +0200 Subject: fs: fix kernel_read prototype Use proper ssize_t and size_t types for the return value and count argument, move the offset last and make it an in/out argument like all other read/write helpers, and make the buf argument a void pointer to get rid of lots of casts in the callers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index cbfe127bccf8..2ba8f38a4d63 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2772,13 +2772,13 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) return kernel_read_file_str[id]; } -extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); extern int kernel_read_file_from_path(char *, void **, loff_t *, loff_t, enum kernel_read_file_id); extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); +extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern struct file * open_exec(const char *); -- cgit From e13ec939e96b13e664bb6cee361cc976a0ee621a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Sep 2017 17:39:14 +0200 Subject: fs: fix kernel_write prototype Make the position an in/out argument like all the other read/write helpers and and make the buf argument a void pointer. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ba8f38a4d63..0e7d3da8a307 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2779,7 +2779,7 @@ extern int kernel_read_file_from_path(char *, void **, loff_t *, loff_t, extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); -extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t); +extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern struct file * open_exec(const char *); -- cgit From 73e18f7c0b3e1432353cdd86672c27cace7e6a7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Sep 2017 17:39:15 +0200 Subject: fs: make the buf argument to __kernel_write a void pointer This matches kernel_read and kernel_write and avoids any need for casts in the callers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0e7d3da8a307..9ab7e2bf7dd1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2780,7 +2780,7 @@ extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); -extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); +extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ -- cgit From eb031849d52e61d24ba54e9d27553189ff328174 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Sep 2017 17:39:23 +0200 Subject: fs: unexport __vfs_read/__vfs_write No modular users left, and any new ones should use kernel_read/write or iov_iter variants instead. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ab7e2bf7dd1..f24b821cee8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1754,7 +1754,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec **ret_pointer); extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); -extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, -- cgit From 9725d4cef62229b4ec4c912e0db0761e7d400650 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Sep 2017 17:39:25 +0200 Subject: fs: unexport vfs_readv and vfs_writev We've got no modular users left, and any potential modular user is better of with iov_iter based variants. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index f24b821cee8d..355cf02830a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1758,8 +1758,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, int); -extern ssize_t vfs_writev(struct file *, const struct iovec __user *, - unsigned long, loff_t *, int); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, -- cgit From ec58871fb9c50429d6b5570066a7166da8faf086 Mon Sep 17 00:00:00 2001 From: Guodong Xu Date: Thu, 20 Jul 2017 15:32:42 +0800 Subject: mfd: hi6421-pmic: Add support for HiSilicon Hi6421v530 Add support for HiSilicon Hi6421v530 PMIC. Hi6421v530 communicates with main SoC via memory-mapped I/O. Hi6421v530 and Hi6421 are PMIC chips from the same vendor, HiSilicon, but at different revisions. They share the same memory-mapped I/O design. They differ in integrated devices, such as regulator details, LDO voltage points. Signed-off-by: Guodong Xu Signed-off-by: Wang Xiaoyin Acked-by: Arnd Bergmann Signed-off-by: Lee Jones --- include/linux/mfd/hi6421-pmic.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/hi6421-pmic.h b/include/linux/mfd/hi6421-pmic.h index 587273e35acf..2580c08db7b1 100644 --- a/include/linux/mfd/hi6421-pmic.h +++ b/include/linux/mfd/hi6421-pmic.h @@ -38,4 +38,9 @@ struct hi6421_pmic { struct regmap *regmap; }; +enum hi6421_type { + HI6421 = 0, + HI6421_V530, +}; + #endif /* __HI6421_PMIC_H */ -- cgit From d3ea212720948acff862b4c842d5b464ad338841 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 17 Jul 2017 22:45:12 +0200 Subject: mfd: Add ROHM BD9571MWV-M MFD PMIC driver Add the MFD part of the ROHM BD9571MWV-M PMIC driver and MAINTAINERS entry. The MFD part only specifies the regmap bits for the PMIC and binds the subdevs together. Signed-off-by: Marek Vasut Signed-off-by: Lee Jones --- include/linux/mfd/bd9571mwv.h | 115 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 include/linux/mfd/bd9571mwv.h (limited to 'include') diff --git a/include/linux/mfd/bd9571mwv.h b/include/linux/mfd/bd9571mwv.h new file mode 100644 index 000000000000..f0708ba4cbba --- /dev/null +++ b/include/linux/mfd/bd9571mwv.h @@ -0,0 +1,115 @@ +/* + * ROHM BD9571MWV-M driver + * + * Copyright (C) 2017 Marek Vasut + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether expressed or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + * + * Based on the TPS65086 driver + */ + +#ifndef __LINUX_MFD_BD9571MWV_H +#define __LINUX_MFD_BD9571MWV_H + +#include +#include + +/* List of registers for BD9571MWV */ +#define BD9571MWV_VENDOR_CODE 0x00 +#define BD9571MWV_VENDOR_CODE_VAL 0xdb +#define BD9571MWV_PRODUCT_CODE 0x01 +#define BD9571MWV_PRODUCT_CODE_VAL 0x60 +#define BD9571MWV_PRODUCT_REVISION 0x02 + +#define BD9571MWV_I2C_FUSA_MODE 0x10 +#define BD9571MWV_I2C_MD2_E1_BIT_1 0x11 +#define BD9571MWV_I2C_MD2_E1_BIT_2 0x12 + +#define BD9571MWV_BKUP_MODE_CNT 0x20 +#define BD9571MWV_BKUP_MODE_STATUS 0x21 +#define BD9571MWV_BKUP_RECOVERY_CNT 0x22 +#define BD9571MWV_BKUP_CTRL_TIM_CNT 0x23 +#define BD9571MWV_WAITBKUP_WDT_CNT 0x24 +#define BD9571MWV_128H_TIM_CNT 0x26 +#define BD9571MWV_QLLM_CNT 0x27 + +#define BD9571MWV_AVS_SET_MONI 0x31 +#define BD9571MWV_AVS_SET_MONI_MASK 0x3 +#define BD9571MWV_AVS_VD09_VID(n) (0x32 + (n)) +#define BD9571MWV_AVS_DVFS_VID(n) (0x36 + (n)) + +#define BD9571MWV_VD18_VID 0x42 +#define BD9571MWV_VD25_VID 0x43 +#define BD9571MWV_VD33_VID 0x44 + +#define BD9571MWV_DVFS_VINIT 0x50 +#define BD9571MWV_DVFS_SETVMAX 0x52 +#define BD9571MWV_DVFS_BOOSTVID 0x53 +#define BD9571MWV_DVFS_SETVID 0x54 +#define BD9571MWV_DVFS_MONIVDAC 0x55 +#define BD9571MWV_DVFS_PGD_CNT 0x56 + +#define BD9571MWV_GPIO_DIR 0x60 +#define BD9571MWV_GPIO_OUT 0x61 +#define BD9571MWV_GPIO_IN 0x62 +#define BD9571MWV_GPIO_DEB 0x63 +#define BD9571MWV_GPIO_INT_SET 0x64 +#define BD9571MWV_GPIO_INT 0x65 +#define BD9571MWV_GPIO_INTMASK 0x66 + +#define BD9571MWV_REG_KEEP(n) (0x70 + (n)) + +#define BD9571MWV_PMIC_INTERNAL_STATUS 0x80 +#define BD9571MWV_PROT_ERROR_STATUS0 0x81 +#define BD9571MWV_PROT_ERROR_STATUS1 0x82 +#define BD9571MWV_PROT_ERROR_STATUS2 0x83 +#define BD9571MWV_PROT_ERROR_STATUS3 0x84 +#define BD9571MWV_PROT_ERROR_STATUS4 0x85 + +#define BD9571MWV_INT_INTREQ 0x90 +#define BD9571MWV_INT_INTREQ_MD1_INT BIT(0) +#define BD9571MWV_INT_INTREQ_MD2_E1_INT BIT(1) +#define BD9571MWV_INT_INTREQ_MD2_E2_INT BIT(2) +#define BD9571MWV_INT_INTREQ_PROT_ERR_INT BIT(3) +#define BD9571MWV_INT_INTREQ_GP_INT BIT(4) +#define BD9571MWV_INT_INTREQ_128H_OF_INT BIT(5) +#define BD9571MWV_INT_INTREQ_WDT_OF_INT BIT(6) +#define BD9571MWV_INT_INTREQ_BKUP_TRG_INT BIT(7) +#define BD9571MWV_INT_INTMASK 0x91 + +#define BD9571MWV_ACCESS_KEY 0xff + +/* Define the BD9571MWV IRQ numbers */ +enum bd9571mwv_irqs { + BD9571MWV_IRQ_MD1, + BD9571MWV_IRQ_MD2_E1, + BD9571MWV_IRQ_MD2_E2, + BD9571MWV_IRQ_PROT_ERR, + BD9571MWV_IRQ_GP, + BD9571MWV_IRQ_128H_OF, + BD9571MWV_IRQ_WDT_OF, + BD9571MWV_IRQ_BKUP_TRG, +}; + +/** + * struct bd9571mwv - state holder for the bd9571mwv driver + * + * Device data may be used to access the BD9571MWV chip + */ +struct bd9571mwv { + struct device *dev; + struct regmap *regmap; + + /* IRQ Data */ + int irq; + struct regmap_irq_chip_data *irq_data; +}; + +#endif /* __LINUX_MFD_BD9571MWV_H */ -- cgit From 7303733a6ca2a68b210ebdc09cace8b0ffe8b179 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 26 Jul 2017 16:28:26 +0800 Subject: mfd: axp20x: Add support for AXP813 PMIC The X-Powers AXP813 PMIC is normally used with Allwinner's A83T SoC. It has the same range of functions as other X-Powers PMICs, such as DC-DC buck converter and linear regulator outputs, AC-IN and VBUS power supplies, power button trigger, GPIOs, ADCs, and a battery charger. Note that the IRQ table given in the datasheet is incorrect: in IRQ enable/status registers 1, there are separate IRQs for ACIN and VBUS, instead of bits [7:5] being the same as bits [4:2]. So it shares the same IRQs as the AXP803, rather than the AXP288. This patch adds basic mfd support for it, with only the power button enabled. Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 965b027e31b3..e9c908c4fba8 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -23,6 +23,7 @@ enum axp20x_variants { AXP803_ID, AXP806_ID, AXP809_ID, + AXP813_ID, NR_AXP20X_VARIANTS, }; @@ -387,6 +388,34 @@ enum { AXP803_REG_ID_MAX, }; +enum { + AXP813_DCDC1 = 0, + AXP813_DCDC2, + AXP813_DCDC3, + AXP813_DCDC4, + AXP813_DCDC5, + AXP813_DCDC6, + AXP813_DCDC7, + AXP813_ALDO1, + AXP813_ALDO2, + AXP813_ALDO3, + AXP813_DLDO1, + AXP813_DLDO2, + AXP813_DLDO3, + AXP813_DLDO4, + AXP813_ELDO1, + AXP813_ELDO2, + AXP813_ELDO3, + AXP813_FLDO1, + AXP813_FLDO2, + AXP813_FLDO3, + AXP813_RTC_LDO, + AXP813_LDO_IO0, + AXP813_LDO_IO1, + AXP813_SW, + AXP813_REG_ID_MAX, +}; + /* IRQs */ enum { AXP152_IRQ_LDO0IN_CONNECT = 1, -- cgit From b0f3ab20e76499db12b0bbadb5737d9870f10418 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Tue, 18 Jul 2017 15:22:19 +0200 Subject: mfd: syscon: atmel-smc: Add helper to retrieve register layout For HSMC controller, the register layout depends on the device i.e. the offset of setup, pulse, cycle, mode and timings registers is not the same. An helper is added to provide the correct register layout. Fixes: fe9d7cb22ef3 ("mfd: syscon: atmel-smc: Add new helpers to ease SMC regs manipulation") Suggested-by: Boris Brezillon Signed-off-by: Ludovic Desroches Acked-by: Boris Brezillon Acked-by: Nicolas Ferre Acked-by: Alexandre Belloni Signed-off-by: Lee Jones --- include/linux/mfd/syscon/atmel-smc.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/syscon/atmel-smc.h b/include/linux/mfd/syscon/atmel-smc.h index afa266169800..7a367f34b66a 100644 --- a/include/linux/mfd/syscon/atmel-smc.h +++ b/include/linux/mfd/syscon/atmel-smc.h @@ -15,21 +15,26 @@ #define _LINUX_MFD_SYSCON_ATMEL_SMC_H_ #include +#include #include #define ATMEL_SMC_SETUP(cs) (((cs) * 0x10)) -#define ATMEL_HSMC_SETUP(cs) (0x600 + ((cs) * 0x14)) +#define ATMEL_HSMC_SETUP(layout, cs) \ + ((layout)->timing_regs_offset + ((cs) * 0x14)) #define ATMEL_SMC_PULSE(cs) (((cs) * 0x10) + 0x4) -#define ATMEL_HSMC_PULSE(cs) (0x600 + ((cs) * 0x14) + 0x4) +#define ATMEL_HSMC_PULSE(layout, cs) \ + ((layout)->timing_regs_offset + ((cs) * 0x14) + 0x4) #define ATMEL_SMC_CYCLE(cs) (((cs) * 0x10) + 0x8) -#define ATMEL_HSMC_CYCLE(cs) (0x600 + ((cs) * 0x14) + 0x8) +#define ATMEL_HSMC_CYCLE(layout, cs) \ + ((layout)->timing_regs_offset + ((cs) * 0x14) + 0x8) #define ATMEL_SMC_NWE_SHIFT 0 #define ATMEL_SMC_NCS_WR_SHIFT 8 #define ATMEL_SMC_NRD_SHIFT 16 #define ATMEL_SMC_NCS_RD_SHIFT 24 #define ATMEL_SMC_MODE(cs) (((cs) * 0x10) + 0xc) -#define ATMEL_HSMC_MODE(cs) (0x600 + ((cs) * 0x14) + 0x10) +#define ATMEL_HSMC_MODE(layout, cs) \ + ((layout)->timing_regs_offset + ((cs) * 0x14) + 0x10) #define ATMEL_SMC_MODE_READMODE_MASK BIT(0) #define ATMEL_SMC_MODE_READMODE_NCS (0 << 0) #define ATMEL_SMC_MODE_READMODE_NRD (1 << 0) @@ -59,7 +64,8 @@ #define ATMEL_SMC_MODE_PS_16 (2 << 28) #define ATMEL_SMC_MODE_PS_32 (3 << 28) -#define ATMEL_HSMC_TIMINGS(cs) (0x600 + ((cs) * 0x14) + 0xc) +#define ATMEL_HSMC_TIMINGS(layout, cs) \ + ((layout)->timing_regs_offset + ((cs) * 0x14) + 0xc) #define ATMEL_HSMC_TIMINGS_OCMS BIT(12) #define ATMEL_HSMC_TIMINGS_RBNSEL(x) ((x) << 28) #define ATMEL_HSMC_TIMINGS_NFSEL BIT(31) @@ -69,6 +75,10 @@ #define ATMEL_HSMC_TIMINGS_TRR_SHIFT 16 #define ATMEL_HSMC_TIMINGS_TWB_SHIFT 24 +struct atmel_hsmc_reg_layout { + unsigned int timing_regs_offset; +}; + /** * struct atmel_smc_cs_conf - SMC CS config as described in the datasheet. * @setup: NCS/NWE/NRD setup timings (not applicable to at91rm9200) @@ -98,11 +108,15 @@ int atmel_smc_cs_conf_set_cycle(struct atmel_smc_cs_conf *conf, unsigned int shift, unsigned int ncycles); void atmel_smc_cs_conf_apply(struct regmap *regmap, int cs, const struct atmel_smc_cs_conf *conf); -void atmel_hsmc_cs_conf_apply(struct regmap *regmap, int cs, - const struct atmel_smc_cs_conf *conf); +void atmel_hsmc_cs_conf_apply(struct regmap *regmap, + const struct atmel_hsmc_reg_layout *reglayout, + int cs, const struct atmel_smc_cs_conf *conf); void atmel_smc_cs_conf_get(struct regmap *regmap, int cs, struct atmel_smc_cs_conf *conf); -void atmel_hsmc_cs_conf_get(struct regmap *regmap, int cs, - struct atmel_smc_cs_conf *conf); +void atmel_hsmc_cs_conf_get(struct regmap *regmap, + const struct atmel_hsmc_reg_layout *reglayout, + int cs, struct atmel_smc_cs_conf *conf); +const struct atmel_hsmc_reg_layout * +atmel_hsmc_get_reg_layout(struct device_node *np); #endif /* _LINUX_MFD_SYSCON_ATMEL_SMC_H_ */ -- cgit From 9bbf6a15ce19dd947b7fa6ad4095931ab3682da8 Mon Sep 17 00:00:00 2001 From: Rajmohan Mani Date: Fri, 28 Jul 2017 17:30:24 -0700 Subject: mfd: Add support for TPS68470 device The TPS68470 device is an advanced power management unit that powers a Compact Camera Module (CCM), generates clocks for image sensors, drives a dual LED for Flash and incorporates two LED drivers for general purpose indicators. This patch adds support for TPS68470 mfd device. Signed-off-by: Rajmohan Mani Signed-off-by: Lee Jones --- include/linux/mfd/tps68470.h | 97 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 include/linux/mfd/tps68470.h (limited to 'include') diff --git a/include/linux/mfd/tps68470.h b/include/linux/mfd/tps68470.h new file mode 100644 index 000000000000..44f9d9f647ed --- /dev/null +++ b/include/linux/mfd/tps68470.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 Intel Corporation + * + * Functions to access TPS68470 power management chip. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_MFD_TPS68470_H +#define __LINUX_MFD_TPS68470_H + +/* Register addresses */ +#define TPS68470_REG_POSTDIV2 0x06 +#define TPS68470_REG_BOOSTDIV 0x07 +#define TPS68470_REG_BUCKDIV 0x08 +#define TPS68470_REG_PLLSWR 0x09 +#define TPS68470_REG_XTALDIV 0x0A +#define TPS68470_REG_PLLDIV 0x0B +#define TPS68470_REG_POSTDIV 0x0C +#define TPS68470_REG_PLLCTL 0x0D +#define TPS68470_REG_PLLCTL2 0x0E +#define TPS68470_REG_CLKCFG1 0x0F +#define TPS68470_REG_CLKCFG2 0x10 +#define TPS68470_REG_GPCTL0A 0x14 +#define TPS68470_REG_GPCTL0B 0x15 +#define TPS68470_REG_GPCTL1A 0x16 +#define TPS68470_REG_GPCTL1B 0x17 +#define TPS68470_REG_GPCTL2A 0x18 +#define TPS68470_REG_GPCTL2B 0x19 +#define TPS68470_REG_GPCTL3A 0x1A +#define TPS68470_REG_GPCTL3B 0x1B +#define TPS68470_REG_GPCTL4A 0x1C +#define TPS68470_REG_GPCTL4B 0x1D +#define TPS68470_REG_GPCTL5A 0x1E +#define TPS68470_REG_GPCTL5B 0x1F +#define TPS68470_REG_GPCTL6A 0x20 +#define TPS68470_REG_GPCTL6B 0x21 +#define TPS68470_REG_SGPO 0x22 +#define TPS68470_REG_GPDI 0x26 +#define TPS68470_REG_GPDO 0x27 +#define TPS68470_REG_VCMVAL 0x3C +#define TPS68470_REG_VAUX1VAL 0x3D +#define TPS68470_REG_VAUX2VAL 0x3E +#define TPS68470_REG_VIOVAL 0x3F +#define TPS68470_REG_VSIOVAL 0x40 +#define TPS68470_REG_VAVAL 0x41 +#define TPS68470_REG_VDVAL 0x42 +#define TPS68470_REG_S_I2C_CTL 0x43 +#define TPS68470_REG_VCMCTL 0x44 +#define TPS68470_REG_VAUX1CTL 0x45 +#define TPS68470_REG_VAUX2CTL 0x46 +#define TPS68470_REG_VACTL 0x47 +#define TPS68470_REG_VDCTL 0x48 +#define TPS68470_REG_RESET 0x50 +#define TPS68470_REG_REVID 0xFF + +#define TPS68470_REG_MAX TPS68470_REG_REVID + +/* Register field definitions */ + +#define TPS68470_REG_RESET_MASK GENMASK(7, 0) +#define TPS68470_VAVAL_AVOLT_MASK GENMASK(6, 0) + +#define TPS68470_VDVAL_DVOLT_MASK GENMASK(5, 0) +#define TPS68470_VCMVAL_VCVOLT_MASK GENMASK(6, 0) +#define TPS68470_VIOVAL_IOVOLT_MASK GENMASK(6, 0) +#define TPS68470_VSIOVAL_IOVOLT_MASK GENMASK(6, 0) +#define TPS68470_VAUX1VAL_AUX1VOLT_MASK GENMASK(6, 0) +#define TPS68470_VAUX2VAL_AUX2VOLT_MASK GENMASK(6, 0) + +#define TPS68470_VACTL_EN_MASK GENMASK(0, 0) +#define TPS68470_VDCTL_EN_MASK GENMASK(0, 0) +#define TPS68470_VCMCTL_EN_MASK GENMASK(0, 0) +#define TPS68470_S_I2C_CTL_EN_MASK GENMASK(1, 0) +#define TPS68470_VAUX1CTL_EN_MASK GENMASK(0, 0) +#define TPS68470_VAUX2CTL_EN_MASK GENMASK(0, 0) +#define TPS68470_PLL_EN_MASK GENMASK(0, 0) + +#define TPS68470_CLKCFG1_MODE_A_MASK GENMASK(1, 0) +#define TPS68470_CLKCFG1_MODE_B_MASK GENMASK(3, 2) + +#define TPS68470_GPIO_CTL_REG_A(x) (TPS68470_REG_GPCTL0A + (x) * 2) +#define TPS68470_GPIO_CTL_REG_B(x) (TPS68470_REG_GPCTL0B + (x) * 2) +#define TPS68470_GPIO_MODE_MASK GENMASK(1, 0) +#define TPS68470_GPIO_MODE_IN 0 +#define TPS68470_GPIO_MODE_IN_PULLUP 1 +#define TPS68470_GPIO_MODE_OUT_CMOS 2 +#define TPS68470_GPIO_MODE_OUT_ODRAIN 3 + +#endif /* __LINUX_MFD_TPS68470_H */ -- cgit From 53168215909281a09d3afc6fb51a9d4f81f74d39 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Jun 2017 12:20:30 +0200 Subject: mac80211: fix VLAN handling with TXQs With TXQs, the AP_VLAN interfaces are resolved to their owner AP interface when enqueuing the frame, which makes sense since the frame really goes out on that as far as the driver is concerned. However, this introduces a problem: frames to be encrypted with a VLAN-specific GTK will now be encrypted with the AP GTK, since the information about which virtual interface to use to select the key is taken from the TXQ. Fix this by preserving info->control.vif and using that in the dequeue function. This now requires doing the driver-mapping in the dequeue as well. Since there's no way to filter the frames that are sitting on a TXQ, drop all frames, which may affect other interfaces, when an AP_VLAN is removed. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f8149ca192b4..885690fa39c8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -919,21 +919,10 @@ struct ieee80211_tx_info { unsigned long jiffies; }; /* NB: vif can be NULL for injected frames */ - union { - /* NB: vif can be NULL for injected frames */ - struct ieee80211_vif *vif; - - /* When packets are enqueued on txq it's easy - * to re-construct the vif pointer. There's no - * more space in tx_info so it can be used to - * store the necessary enqueue time for packet - * sojourn time computation. - */ - codel_time_t enqueue_time; - }; + struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; u32 flags; - /* 4 bytes free */ + codel_time_t enqueue_time; } control; struct { u64 cookie; -- cgit From c93022a72f01f8e53d6e1bc2a8d2c2824c2f36bc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 Sep 2017 05:43:39 -0400 Subject: media: ca.h: split typedefs from structs Using typedefs inside the Kernel is against CodingStyle, and there's no good usage here. Just like we did at frontend.h, at commit 0df289a209e0 ("[media] dvb: Get rid of typedev usage for enums"), let's keep those typedefs only to provide userspace backward compatibility. No functional changes. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/ca.h | 51 ++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index c18537f3e449..00cf24587bea 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -26,7 +26,7 @@ /* slot interface types and info */ -typedef struct ca_slot_info { +struct ca_slot_info { int num; /* slot number */ int type; /* CA interface this slot supports */ @@ -39,52 +39,65 @@ typedef struct ca_slot_info { unsigned int flags; #define CA_CI_MODULE_PRESENT 1 /* module (or card) inserted */ #define CA_CI_MODULE_READY 2 -} ca_slot_info_t; +}; /* descrambler types and info */ -typedef struct ca_descr_info { +struct ca_descr_info { unsigned int num; /* number of available descramblers (keys) */ unsigned int type; /* type of supported scrambling system */ #define CA_ECD 1 #define CA_NDS 2 #define CA_DSS 4 -} ca_descr_info_t; +}; -typedef struct ca_caps { +struct ca_caps { unsigned int slot_num; /* total number of CA card and module slots */ unsigned int slot_type; /* OR of all supported types */ unsigned int descr_num; /* total number of descrambler slots (keys) */ unsigned int descr_type; /* OR of all supported types */ -} ca_caps_t; +}; /* a message to/from a CI-CAM */ -typedef struct ca_msg { +struct ca_msg { unsigned int index; unsigned int type; unsigned int length; unsigned char msg[256]; -} ca_msg_t; +}; -typedef struct ca_descr { +struct ca_descr { unsigned int index; unsigned int parity; /* 0 == even, 1 == odd */ unsigned char cw[8]; -} ca_descr_t; +}; -typedef struct ca_pid { +struct ca_pid { unsigned int pid; int index; /* -1 == disable*/ -} ca_pid_t; +}; #define CA_RESET _IO('o', 128) -#define CA_GET_CAP _IOR('o', 129, ca_caps_t) -#define CA_GET_SLOT_INFO _IOR('o', 130, ca_slot_info_t) -#define CA_GET_DESCR_INFO _IOR('o', 131, ca_descr_info_t) -#define CA_GET_MSG _IOR('o', 132, ca_msg_t) -#define CA_SEND_MSG _IOW('o', 133, ca_msg_t) -#define CA_SET_DESCR _IOW('o', 134, ca_descr_t) -#define CA_SET_PID _IOW('o', 135, ca_pid_t) +#define CA_GET_CAP _IOR('o', 129, struct ca_caps) +#define CA_GET_SLOT_INFO _IOR('o', 130, struct ca_slot_info) +#define CA_GET_DESCR_INFO _IOR('o', 131, struct ca_descr_info) +#define CA_GET_MSG _IOR('o', 132, struct ca_msg) +#define CA_SEND_MSG _IOW('o', 133, struct ca_msg) +#define CA_SET_DESCR _IOW('o', 134, struct ca_descr) +#define CA_SET_PID _IOW('o', 135, struct ca_pid) + +#if !defined (__KERNEL__) + +/* This is needed for legacy userspace support */ +typedef struct ca_slot_info ca_slot_info_t; +typedef struct ca_descr_info ca_descr_info_t; +typedef struct ca_caps ca_caps_t; +typedef struct ca_msg ca_msg_t; +typedef struct ca_descr ca_descr_t; +typedef struct ca_pid ca_pid_t; + +#endif + #endif -- cgit From 3256b36ea36525945d8575c0100752819a309aaa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 Sep 2017 06:09:14 -0400 Subject: media: dmx.h: split typedefs from structs Using typedefs inside the Kernel is against CodingStyle, and there's no good usage here. Just like we did at frontend.h, at commit 0df289a209e0 ("[media] dvb: Get rid of typedev usage for enums"), let's keep those typedefs only to provide userspace backward compatibility. No functional changes. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/dmx.h | 56 ++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index 427e4899ed69..1bc4d6fb0f01 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -43,16 +43,14 @@ enum dmx_output DMX_OUT_TSDEMUX_TAP /* Like TS_TAP but retrieved from the DMX device */ }; -typedef enum dmx_output dmx_output_t; - -typedef enum dmx_input +enum dmx_input { DMX_IN_FRONTEND, /* Input from a front-end device. */ DMX_IN_DVR /* Input from the logical DVR device. */ -} dmx_input_t; +}; -typedef enum dmx_ts_pes +enum dmx_ts_pes { DMX_PES_AUDIO0, DMX_PES_VIDEO0, @@ -79,7 +77,7 @@ typedef enum dmx_ts_pes DMX_PES_PCR3, DMX_PES_OTHER -} dmx_pes_type_t; +}; #define DMX_PES_AUDIO DMX_PES_AUDIO0 #define DMX_PES_VIDEO DMX_PES_VIDEO0 @@ -88,20 +86,20 @@ typedef enum dmx_ts_pes #define DMX_PES_PCR DMX_PES_PCR0 -typedef struct dmx_filter +struct dmx_filter { __u8 filter[DMX_FILTER_SIZE]; __u8 mask[DMX_FILTER_SIZE]; __u8 mode[DMX_FILTER_SIZE]; -} dmx_filter_t; +}; struct dmx_sct_filter_params { - __u16 pid; - dmx_filter_t filter; - __u32 timeout; - __u32 flags; + __u16 pid; + struct dmx_filter filter; + __u32 timeout; + __u32 flags; #define DMX_CHECK_CRC 1 #define DMX_ONESHOT 2 #define DMX_IMMEDIATE_START 4 @@ -111,19 +109,19 @@ struct dmx_sct_filter_params struct dmx_pes_filter_params { - __u16 pid; - dmx_input_t input; - dmx_output_t output; - dmx_pes_type_t pes_type; - __u32 flags; + __u16 pid; + enum dmx_input input; + enum dmx_output output; + enum dmx_ts_pes pes_type; + __u32 flags; }; -typedef struct dmx_caps { +struct dmx_caps { __u32 caps; int num_decoders; -} dmx_caps_t; +}; -typedef enum dmx_source { +enum dmx_source { DMX_SOURCE_FRONT0 = 0, DMX_SOURCE_FRONT1, DMX_SOURCE_FRONT2, @@ -132,7 +130,7 @@ typedef enum dmx_source { DMX_SOURCE_DVR1, DMX_SOURCE_DVR2, DMX_SOURCE_DVR3 -} dmx_source_t; +}; struct dmx_stc { unsigned int num; /* input : which STC? 0..N */ @@ -146,10 +144,22 @@ struct dmx_stc { #define DMX_SET_PES_FILTER _IOW('o', 44, struct dmx_pes_filter_params) #define DMX_SET_BUFFER_SIZE _IO('o', 45) #define DMX_GET_PES_PIDS _IOR('o', 47, __u16[5]) -#define DMX_GET_CAPS _IOR('o', 48, dmx_caps_t) -#define DMX_SET_SOURCE _IOW('o', 49, dmx_source_t) +#define DMX_GET_CAPS _IOR('o', 48, struct dmx_caps) +#define DMX_SET_SOURCE _IOW('o', 49, enum dmx_source) #define DMX_GET_STC _IOWR('o', 50, struct dmx_stc) #define DMX_ADD_PID _IOW('o', 51, __u16) #define DMX_REMOVE_PID _IOW('o', 52, __u16) +#if !defined (__KERNEL__) + +/* This is needed for legacy userspace support */ +typedef enum dmx_output dmx_output_t; +typedef enum dmx_input dmx_input_t; +typedef enum dmx_ts_pes dmx_pes_type_t; +typedef struct dmx_filter dmx_filter_t; +typedef struct dmx_caps dmx_caps_t; +typedef enum dmx_source dmx_source_t; + +#endif + #endif /* _UAPI_DVBDMX_H_ */ -- cgit From f35afa4f60c868d7c7811ba747133acbf39410ac Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 30 Aug 2017 07:55:47 -0400 Subject: media: dvb/frontend.h: move out a private internal structure struct dtv_cmds_h is just an ancillary struct used by the dvb_frontend.c to internally store frontend commands. It doesn't belong to the userspace header, nor it is used anywhere, except inside the DVB core. So, remove it from the header. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index afc3972b0879..3a80f3d1da1c 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -384,17 +384,6 @@ enum atscmh_rs_code_mode { #define NO_STREAM_ID_FILTER (~0U) #define LNA_AUTO (~0U) -struct dtv_cmds_h { - char *name; /* A display name for debugging purposes */ - - __u32 cmd; /* A unique ID */ - - /* Flags */ - __u32 set:1; /* Either a set or get property */ - __u32 buffer:1; /* Does this property use the buffer? */ - __u32 reserved:30; /* Align */ -}; - /** * Scale types for the quality parameters. * @FE_SCALE_NOT_AVAILABLE: That QoS measure is not available. That -- cgit From 8220ead805b6bab4ade2839857a198e9708b07de Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 30 Aug 2017 08:12:38 -0400 Subject: media: dvb/frontend.h: document the uAPI file Most of the stuff at the Digital TV frontend header file are documented only at the Documentation. However, a few kernel-doc markups are there, several of them with parsing issues. Add the missing documentation, copying definitions from the Documentation when it applies, fixing some bugs. Please notice that DVBv3 stuff that were deprecated weren't commented by purpose. Instead, they were clearly tagged as such. This patch prepares to move part of the documentation from Documentation/ to kernel-doc comments. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 580 ++++++++++++++++++++++++++++++++------ 1 file changed, 498 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index 3a80f3d1da1c..16a318fc469a 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -28,13 +28,46 @@ #include -enum fe_type { - FE_QPSK, - FE_QAM, - FE_OFDM, - FE_ATSC -}; - +/** + * enum fe_caps - Frontend capabilities + * + * @FE_IS_STUPID: There's something wrong at the + * frontend, and it can't report its + * capabilities. + * @FE_CAN_INVERSION_AUTO: Can auto-detect frequency spectral + * band inversion + * @FE_CAN_FEC_1_2: Supports FEC 1/2 + * @FE_CAN_FEC_2_3: Supports FEC 2/3 + * @FE_CAN_FEC_3_4: Supports FEC 3/4 + * @FE_CAN_FEC_4_5: Supports FEC 4/5 + * @FE_CAN_FEC_5_6: Supports FEC 5/6 + * @FE_CAN_FEC_6_7: Supports FEC 6/7 + * @FE_CAN_FEC_7_8: Supports FEC 7/8 + * @FE_CAN_FEC_8_9: Supports FEC 8/9 + * @FE_CAN_FEC_AUTO: Can auto-detect FEC + * @FE_CAN_QPSK: Supports QPSK modulation + * @FE_CAN_QAM_16: Supports 16-QAM modulation + * @FE_CAN_QAM_32: Supports 32-QAM modulation + * @FE_CAN_QAM_64: Supports 64-QAM modulation + * @FE_CAN_QAM_128: Supports 128-QAM modulation + * @FE_CAN_QAM_256: Supports 256-QAM modulation + * @FE_CAN_QAM_AUTO: Can auto-detect QAM modulation + * @FE_CAN_TRANSMISSION_MODE_AUTO: Can auto-detect transmission mode + * @FE_CAN_BANDWIDTH_AUTO: Can auto-detect bandwidth + * @FE_CAN_GUARD_INTERVAL_AUTO: Can auto-detect guard interval + * @FE_CAN_HIERARCHY_AUTO: Can auto-detect hierarchy + * @FE_CAN_8VSB: Supports 8-VSB modulation + * @FE_CAN_16VSB: Supporta 16-VSB modulation + * @FE_HAS_EXTENDED_CAPS: Unused + * @FE_CAN_MULTISTREAM: Supports multistream filtering + * @FE_CAN_TURBO_FEC: Supports "turbo FEC" modulation + * @FE_CAN_2G_MODULATION: Supports "2nd generation" modulation, + * e. g. DVB-S2, DVB-T2, DVB-C2 + * @FE_NEEDS_BENDING: Unused + * @FE_CAN_RECOVER: Can recover from a cable unplug + * automatically + * @FE_CAN_MUTE_TS: Can stop spurious TS data output + */ enum fe_caps { FE_IS_STUPID = 0, FE_CAN_INVERSION_AUTO = 0x1, @@ -60,15 +93,55 @@ enum fe_caps { FE_CAN_HIERARCHY_AUTO = 0x100000, FE_CAN_8VSB = 0x200000, FE_CAN_16VSB = 0x400000, - FE_HAS_EXTENDED_CAPS = 0x800000, /* We need more bitspace for newer APIs, indicate this. */ - FE_CAN_MULTISTREAM = 0x4000000, /* frontend supports multistream filtering */ - FE_CAN_TURBO_FEC = 0x8000000, /* frontend supports "turbo fec modulation" */ - FE_CAN_2G_MODULATION = 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */ - FE_NEEDS_BENDING = 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */ - FE_CAN_RECOVER = 0x40000000, /* frontend can recover from a cable unplug automatically */ - FE_CAN_MUTE_TS = 0x80000000 /* frontend can stop spurious TS data output */ + FE_HAS_EXTENDED_CAPS = 0x800000, + FE_CAN_MULTISTREAM = 0x4000000, + FE_CAN_TURBO_FEC = 0x8000000, + FE_CAN_2G_MODULATION = 0x10000000, + FE_NEEDS_BENDING = 0x20000000, + FE_CAN_RECOVER = 0x40000000, + FE_CAN_MUTE_TS = 0x80000000 +}; + +/* + * DEPRECATED: Should be kept just due to backward compatibility. + */ +enum fe_type { + FE_QPSK, + FE_QAM, + FE_OFDM, + FE_ATSC }; +/** + * struct dvb_frontend_info - Frontend properties and capabilities + * + * @name: Name of the frontend + * @type: **DEPRECATED**. + * Should not be used on modern programs, + * as a frontend may have more than one type. + * In order to get the support types of a given + * frontend, use :c:type:`DTV_ENUM_DELSYS` + * instead. + * @frequency_min: Minimal frequency supported by the frontend. + * @frequency_max: Minimal frequency supported by the frontend. + * @frequency_stepsize: All frequencies are multiple of this value. + * @frequency_tolerance: Frequency tolerance. + * @symbol_rate_min: Minimal symbol rate, in bauds + * (for Cable/Satellite systems). + * @symbol_rate_max: Maximal symbol rate, in bauds + * (for Cable/Satellite systems). + * @symbol_rate_tolerance: Maximal symbol rate tolerance, in ppm + * (for Cable/Satellite systems). + * @notifier_delay: **DEPRECATED**. Not used by any driver. + * @caps: Capabilities supported by the frontend, + * as specified in &enum fe_caps. + * + * .. note: + * + * #. The frequencies are specified in Hz for Terrestrial and Cable + * systems. + * #. The frequencies are specified in kHz for Satellite systems. + */ struct dvb_frontend_info { char name[128]; enum fe_type type; /* DEPRECATED. Use DTV_ENUM_DELSYS instead */ @@ -78,53 +151,102 @@ struct dvb_frontend_info { __u32 frequency_tolerance; __u32 symbol_rate_min; __u32 symbol_rate_max; - __u32 symbol_rate_tolerance; /* ppm */ + __u32 symbol_rate_tolerance; __u32 notifier_delay; /* DEPRECATED */ enum fe_caps caps; }; - /** - * Check out the DiSEqC bus spec available on http://www.eutelsat.org/ for - * the meaning of this struct... + * struct dvb_diseqc_master_cmd - DiSEqC master command + * + * @msg: + * DiSEqC message to be sent. It contains a 3 bytes header with: + * framing + address + command, and an optional argument + * of up to 3 bytes of data. + * @msg_len: + * Length of the DiSEqC message. Valid values are 3 to 6. + * + * Check out the DiSEqC bus spec available on http://www.eutelsat.org/ for + * the possible messages that can be used. */ struct dvb_diseqc_master_cmd { - __u8 msg [6]; /* { framing, address, command, data [3] } */ - __u8 msg_len; /* valid values are 3...6 */ + __u8 msg[6]; + __u8 msg_len; }; +/** + * struct dvb_diseqc_slave_reply - DiSEqC received data + * + * @msg: + * DiSEqC message buffer to store a message received via DiSEqC. + * It contains one byte header with: framing and + * an optional argument of up to 3 bytes of data. + * @msg_len: + * Length of the DiSEqC message. Valid values are 0 to 4, + * where 0 means no message. + * @timeout: + * Return from ioctl after timeout ms with errorcode when + * no message was received. + * + * Check out the DiSEqC bus spec available on http://www.eutelsat.org/ for + * the possible messages that can be used. + */ struct dvb_diseqc_slave_reply { - __u8 msg [4]; /* { framing, data [3] } */ - __u8 msg_len; /* valid values are 0...4, 0 means no msg */ - int timeout; /* return from ioctl after timeout ms with */ -}; /* errorcode when no message was received */ + __u8 msg[4]; + __u8 msg_len; + int timeout; +}; +/** + * enum fe_sec_voltage - DC Voltage used to feed the LNBf + * + * @SEC_VOLTAGE_13: Output 13V to the LNBf + * @SEC_VOLTAGE_18: Output 18V to the LNBf + * @SEC_VOLTAGE_OFF: Don't feed the LNBf with a DC voltage + */ enum fe_sec_voltage { SEC_VOLTAGE_13, SEC_VOLTAGE_18, SEC_VOLTAGE_OFF }; +/** + * enum fe_sec_tone_mode - Type of tone to be send to the LNBf. + * @SEC_TONE_ON: Sends a 22kHz tone burst to the antenna. + * @SEC_TONE_OFF: Don't send a 22kHz tone to the antenna (except + * if the ``FE_DISEQC_*`` ioctls are called). + */ enum fe_sec_tone_mode { SEC_TONE_ON, SEC_TONE_OFF }; +/** + * enum fe_sec_mini_cmd - Type of mini burst to be sent + * + * @SEC_MINI_A: Sends a mini-DiSEqC 22kHz '0' Tone Burst to select + * satellite-A + * @SEC_MINI_B: Sends a mini-DiSEqC 22kHz '1' Data Burst to select + * satellite-B + */ enum fe_sec_mini_cmd { SEC_MINI_A, SEC_MINI_B }; /** - * enum fe_status - enumerates the possible frontend status - * @FE_HAS_SIGNAL: found something above the noise level - * @FE_HAS_CARRIER: found a DVB signal - * @FE_HAS_VITERBI: FEC is stable - * @FE_HAS_SYNC: found sync bytes - * @FE_HAS_LOCK: everything's working - * @FE_TIMEDOUT: no lock within the last ~2 seconds - * @FE_REINIT: frontend was reinitialized, application is recommended - * to reset DiSEqC, tone and parameters + * enum fe_status - Enumerates the possible frontend status. + * @FE_NONE: The frontend doesn't have any kind of lock. + * That's the initial frontend status + * @FE_HAS_SIGNAL: Has found something above the noise level. + * @FE_HAS_CARRIER: Has found a DVB signal. + * @FE_HAS_VITERBI: FEC inner coding (Viterbi, LDPC or other inner code). + * is stable. + * @FE_HAS_SYNC: Synchronization bytes was found. + * @FE_HAS_LOCK: DVB were locked and everything is working. + * @FE_TIMEDOUT: Fo lock within the last about 2 seconds. + * @FE_REINIT: Frontend was reinitialized, application is recommended + * to reset DiSEqC, tone and parameters. */ enum fe_status { FE_NONE = 0x00, @@ -137,12 +259,45 @@ enum fe_status { FE_REINIT = 0x40, }; +/** + * enum fe_spectral_inversion - Type of inversion band + * + * @INVERSION_OFF: Don't do spectral band inversion. + * @INVERSION_ON: Do spectral band inversion. + * @INVERSION_AUTO: Autodetect spectral band inversion. + * + * This parameter indicates if spectral inversion should be presumed or + * not. In the automatic setting (``INVERSION_AUTO``) the hardware will try + * to figure out the correct setting by itself. If the hardware doesn't + * support, the DVB core will try to lock at the carrier first with + * inversion off. If it fails, it will try to enable inversion. + */ enum fe_spectral_inversion { INVERSION_OFF, INVERSION_ON, INVERSION_AUTO }; +/** + * enum fe_code_rate - Type of Forward Error Correction (FEC) + * + * + * @FEC_NONE: No Forward Error Correction Code + * @FEC_1_2: Forward Error Correction Code 1/2 + * @FEC_2_3: Forward Error Correction Code 2/3 + * @FEC_3_4: Forward Error Correction Code 3/4 + * @FEC_4_5: Forward Error Correction Code 4/5 + * @FEC_5_6: Forward Error Correction Code 5/6 + * @FEC_6_7: Forward Error Correction Code 6/7 + * @FEC_7_8: Forward Error Correction Code 7/8 + * @FEC_8_9: Forward Error Correction Code 8/9 + * @FEC_AUTO: Autodetect Error Correction Code + * @FEC_3_5: Forward Error Correction Code 3/5 + * @FEC_9_10: Forward Error Correction Code 9/10 + * @FEC_2_5: Forward Error Correction Code 2/5 + * + * Please note that not all FEC types are supported by a given standard. + */ enum fe_code_rate { FEC_NONE = 0, FEC_1_2, @@ -159,6 +314,26 @@ enum fe_code_rate { FEC_2_5, }; +/** + * enum fe_modulation - Type of modulation/constellation + * @QPSK: QPSK modulation + * @QAM_16: 16-QAM modulation + * @QAM_32: 32-QAM modulation + * @QAM_64: 64-QAM modulation + * @QAM_128: 128-QAM modulation + * @QAM_256: 256-QAM modulation + * @QAM_AUTO: Autodetect QAM modulation + * @VSB_8: 8-VSB modulation + * @VSB_16: 16-VSB modulation + * @PSK_8: 8-PSK modulation + * @APSK_16: 16-APSK modulation + * @APSK_32: 32-APSK modulation + * @DQPSK: DQPSK modulation + * @QAM_4_NR: 4-QAM-NR modulation + * + * Please note that not all modulations are supported by a given standard. + * + */ enum fe_modulation { QPSK, QAM_16, @@ -176,6 +351,32 @@ enum fe_modulation { QAM_4_NR, }; +/** + * enum fe_transmit_mode - Transmission mode + * + * @TRANSMISSION_MODE_AUTO: + * Autodetect transmission mode. The hardware will try to find the + * correct FFT-size (if capable) to fill in the missing parameters. + * @TRANSMISSION_MODE_1K: + * Transmission mode 1K + * @TRANSMISSION_MODE_2K: + * Transmission mode 2K + * @TRANSMISSION_MODE_8K: + * Transmission mode 8K + * @TRANSMISSION_MODE_4K: + * Transmission mode 4K + * @TRANSMISSION_MODE_16K: + * Transmission mode 16K + * @TRANSMISSION_MODE_32K: + * Transmission mode 32K + * @TRANSMISSION_MODE_C1: + * Single Carrier (C=1) transmission mode (DTMB only) + * @TRANSMISSION_MODE_C3780: + * Multi Carrier (C=3780) transmission mode (DTMB only) + * + * Please note that not all transmission modes are supported by a given + * standard. + */ enum fe_transmit_mode { TRANSMISSION_MODE_2K, TRANSMISSION_MODE_8K, @@ -188,6 +389,23 @@ enum fe_transmit_mode { TRANSMISSION_MODE_C3780, }; +/** + * enum fe_guard_interval - Guard interval + * + * @GUARD_INTERVAL_AUTO: Autodetect the guard interval + * @GUARD_INTERVAL_1_128: Guard interval 1/128 + * @GUARD_INTERVAL_1_32: Guard interval 1/32 + * @GUARD_INTERVAL_1_16: Guard interval 1/16 + * @GUARD_INTERVAL_1_8: Guard interval 1/8 + * @GUARD_INTERVAL_1_4: Guard interval 1/4 + * @GUARD_INTERVAL_19_128: Guard interval 19/128 + * @GUARD_INTERVAL_19_256: Guard interval 19/256 + * @GUARD_INTERVAL_PN420: PN length 420 (1/4) + * @GUARD_INTERVAL_PN595: PN length 595 (1/6) + * @GUARD_INTERVAL_PN945: PN length 945 (1/9) + * + * Please note that not all guard intervals are supported by a given standard. + */ enum fe_guard_interval { GUARD_INTERVAL_1_32, GUARD_INTERVAL_1_16, @@ -202,6 +420,16 @@ enum fe_guard_interval { GUARD_INTERVAL_PN945, }; +/** + * enum fe_hierarchy - Hierarchy + * @HIERARCHY_NONE: No hierarchy + * @HIERARCHY_AUTO: Autodetect hierarchy (if supported) + * @HIERARCHY_1: Hierarchy 1 + * @HIERARCHY_2: Hierarchy 2 + * @HIERARCHY_4: Hierarchy 4 + * + * Please note that not all hierarchy types are supported by a given standard. + */ enum fe_hierarchy { HIERARCHY_NONE, HIERARCHY_1, @@ -210,6 +438,15 @@ enum fe_hierarchy { HIERARCHY_AUTO }; +/** + * enum fe_interleaving - Interleaving + * @INTERLEAVING_NONE: No interleaving. + * @INTERLEAVING_AUTO: Auto-detect interleaving. + * @INTERLEAVING_240: Interleaving of 240 symbols. + * @INTERLEAVING_720: Interleaving of 720 symbols. + * + * Please note that, currently, only DTMB uses it. + */ enum fe_interleaving { INTERLEAVING_NONE, INTERLEAVING_AUTO, @@ -217,7 +454,8 @@ enum fe_interleaving { INTERLEAVING_720, }; -/* S2API Commands */ +/* DVBv5 property Commands */ + #define DTV_UNDEFINED 0 #define DTV_TUNE 1 #define DTV_CLEAR 2 @@ -310,19 +548,79 @@ enum fe_interleaving { #define DTV_MAX_COMMAND DTV_STAT_TOTAL_BLOCK_COUNT +/** + * enum fe_pilot - Type of pilot tone + * + * @PILOT_ON: Pilot tones enabled + * @PILOT_OFF: Pilot tones disabled + * @PILOT_AUTO: Autodetect pilot tones + */ enum fe_pilot { PILOT_ON, PILOT_OFF, PILOT_AUTO, }; +/** + * enum fe_rolloff - Rolloff factor (also known as alpha) + * @ROLLOFF_35: Roloff factor: 35% + * @ROLLOFF_20: Roloff factor: 20% + * @ROLLOFF_25: Roloff factor: 25% + * @ROLLOFF_AUTO: Auto-detect the roloff factor. + * + * .. note: + * + * Roloff factor of 35% is implied on DVB-S. On DVB-S2, it is default. + */ enum fe_rolloff { - ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */ + ROLLOFF_35, ROLLOFF_20, ROLLOFF_25, ROLLOFF_AUTO, }; +/** + * enum fe_delivery_system - Type of the delivery system + * + * @SYS_UNDEFINED: + * Undefined standard. Generally, indicates an error + * @SYS_DVBC_ANNEX_A: + * Cable TV: DVB-C following ITU-T J.83 Annex A spec + * @SYS_DVBC_ANNEX_B: + * Cable TV: DVB-C following ITU-T J.83 Annex B spec (ClearQAM) + * @SYS_DVBC_ANNEX_C: + * Cable TV: DVB-C following ITU-T J.83 Annex C spec + * @SYS_ISDBC: + * Cable TV: ISDB-C (no drivers yet) + * @SYS_DVBT: + * Terrestrial TV: DVB-T + * @SYS_DVBT2: + * Terrestrial TV: DVB-T2 + * @SYS_ISDBT: + * Terrestrial TV: ISDB-T + * @SYS_ATSC: + * Terrestrial TV: ATSC + * @SYS_ATSCMH: + * Terrestrial TV (mobile): ATSC-M/H + * @SYS_DTMB: + * Terrestrial TV: DTMB + * @SYS_DVBS: + * Satellite TV: DVB-S + * @SYS_DVBS2: + * Satellite TV: DVB-S2 + * @SYS_TURBO: + * Satellite TV: DVB-S Turbo + * @SYS_ISDBS: + * Satellite TV: ISDB-S + * @SYS_DAB: + * Digital audio: DAB (not fully supported) + * @SYS_DSS: + * Satellite TV: DSS (not fully supported) + * @SYS_CMMB: + * Terrestrial TV (mobile): CMMB (not fully supported) + * @SYS_DVBH: + * Terrestrial TV (mobile): DVB-H (standard deprecated) + */ enum fe_delivery_system { SYS_UNDEFINED, SYS_DVBC_ANNEX_A, @@ -345,35 +643,85 @@ enum fe_delivery_system { SYS_DVBC_ANNEX_C, }; -/* backward compatibility */ +/* backward compatibility definitions for delivery systems */ #define SYS_DVBC_ANNEX_AC SYS_DVBC_ANNEX_A -#define SYS_DMBTH SYS_DTMB /* DMB-TH is legacy name, use DTMB instead */ +#define SYS_DMBTH SYS_DTMB /* DMB-TH is legacy name, use DTMB */ -/* ATSC-MH */ +/* ATSC-MH specific parameters */ +/** + * enum atscmh_sccc_block_mode - Type of Series Concatenated Convolutional + * Code Block Mode. + * + * @ATSCMH_SCCC_BLK_SEP: + * Separate SCCC: the SCCC outer code mode shall be set independently + * for each Group Region (A, B, C, D) + * @ATSCMH_SCCC_BLK_COMB: + * Combined SCCC: all four Regions shall have the same SCCC outer + * code mode. + * @ATSCMH_SCCC_BLK_RES: + * Reserved. Shouldn't be used. + */ enum atscmh_sccc_block_mode { ATSCMH_SCCC_BLK_SEP = 0, ATSCMH_SCCC_BLK_COMB = 1, ATSCMH_SCCC_BLK_RES = 2, }; +/** + * enum atscmh_sccc_code_mode - Type of Series Concatenated Convolutional + * Code Rate. + * + * @ATSCMH_SCCC_CODE_HLF: + * The outer code rate of a SCCC Block is 1/2 rate. + * @ATSCMH_SCCC_CODE_QTR: + * The outer code rate of a SCCC Block is 1/4 rate. + * @ATSCMH_SCCC_CODE_RES: + * Reserved. Should not be used. + */ enum atscmh_sccc_code_mode { ATSCMH_SCCC_CODE_HLF = 0, ATSCMH_SCCC_CODE_QTR = 1, ATSCMH_SCCC_CODE_RES = 2, }; +/** + * enum atscmh_rs_frame_ensemble - Reed Solomon(RS) frame ensemble. + * + * @ATSCMH_RSFRAME_ENS_PRI: Primary Ensemble. + * @ATSCMH_RSFRAME_ENS_SEC: Secondary Ensemble. + */ enum atscmh_rs_frame_ensemble { ATSCMH_RSFRAME_ENS_PRI = 0, ATSCMH_RSFRAME_ENS_SEC = 1, }; +/** + * enum atscmh_rs_frame_mode - Reed Solomon (RS) frame mode. + * + * @ATSCMH_RSFRAME_PRI_ONLY: + * Single Frame: There is only a primary RS Frame for all Group + * Regions. + * @ATSCMH_RSFRAME_PRI_SEC: + * Dual Frame: There are two separate RS Frames: Primary RS Frame for + * Group Region A and B and Secondary RS Frame for Group Region C and + * D. + * @ATSCMH_RSFRAME_RES: + * Reserved. Shouldn't be used. + */ enum atscmh_rs_frame_mode { ATSCMH_RSFRAME_PRI_ONLY = 0, ATSCMH_RSFRAME_PRI_SEC = 1, ATSCMH_RSFRAME_RES = 2, }; +/** + * enum atscmh_rs_code_mode + * @ATSCMH_RSCODE_211_187: Reed Solomon code (211,187). + * @ATSCMH_RSCODE_223_187: Reed Solomon code (223,187). + * @ATSCMH_RSCODE_235_187: Reed Solomon code (235,187). + * @ATSCMH_RSCODE_RES: Reserved. Shouldn't be used. + */ enum atscmh_rs_code_mode { ATSCMH_RSCODE_211_187 = 0, ATSCMH_RSCODE_223_187 = 1, @@ -385,16 +733,17 @@ enum atscmh_rs_code_mode { #define LNA_AUTO (~0U) /** - * Scale types for the quality parameters. + * enum fecap_scale_params - scale types for the quality parameters. + * * @FE_SCALE_NOT_AVAILABLE: That QoS measure is not available. That * could indicate a temporary or a permanent * condition. * @FE_SCALE_DECIBEL: The scale is measured in 0.001 dB steps, typically - * used on signal measures. + * used on signal measures. * @FE_SCALE_RELATIVE: The scale is a relative percentual measure, - * ranging from 0 (0%) to 0xffff (100%). + * ranging from 0 (0%) to 0xffff (100%). * @FE_SCALE_COUNTER: The scale counts the occurrence of an event, like - * bit error, block error, lapsed time. + * bit error, block error, lapsed time. */ enum fecap_scale_params { FE_SCALE_NOT_AVAILABLE = 0, @@ -406,24 +755,38 @@ enum fecap_scale_params { /** * struct dtv_stats - Used for reading a DTV status property * - * @value: value of the measure. Should range from 0 to 0xffff; * @scale: Filled with enum fecap_scale_params - the scale * in usage for that parameter * + * The ``{unnamed_union}`` may have either one of the values below: + * + * %svalue + * integer value of the measure, for %FE_SCALE_DECIBEL, + * used for dB measures. The unit is 0.001 dB. + * + * %uvalue + * unsigned integer value of the measure, used when @scale is + * either %FE_SCALE_RELATIVE or %FE_SCALE_COUNTER. + * * For most delivery systems, this will return a single value for each * parameter. + * * It should be noticed, however, that new OFDM delivery systems like * ISDB can use different modulation types for each group of carriers. * On such standards, up to 8 groups of statistics can be provided, one * for each carrier group (called "layer" on ISDB). + * * In order to be consistent with other delivery systems, the first * value refers to the entire set of carriers ("global"). - * dtv_status:scale should use the value FE_SCALE_NOT_AVAILABLE when + * + * @scale should use the value %FE_SCALE_NOT_AVAILABLE when * the value for the entire group of carriers or from one specific layer * is not provided by the hardware. - * st.len should be filled with the latest filled status + 1. * - * In other words, for ISDB, those values should be filled like: + * @len should be filled with the latest filled status + 1. + * + * In other words, for ISDB, those values should be filled like:: + * * u.st.stat.svalue[0] = global statistics; * u.st.stat.scale[0] = FE_SCALE_DECIBEL; * u.st.stat.value[1] = layer A statistics; @@ -445,11 +808,39 @@ struct dtv_stats { #define MAX_DTV_STATS 4 +/** + * struct dtv_fe_stats - store Digital TV frontend statistics + * + * @len: length of the statistics - if zero, stats is disabled. + * @stat: array with digital TV statistics. + * + * On most standards, @len can either be 0 or 1. However, for ISDB, each + * layer is modulated in separate. So, each layer may have its own set + * of statistics. If so, stat[0] carries on a global value for the property. + * Indexes 1 to 3 means layer A to B. + */ struct dtv_fe_stats { __u8 len; struct dtv_stats stat[MAX_DTV_STATS]; } __attribute__ ((packed)); +/** + * struct dtv_property - store one of frontend command and its value + * + * @cmd: Digital TV command. + * @reserved: Not used. + * @u: Union with the values for the command. + * @result: Result of the command set (currently unused). + * + * The @u union may have either one of the values below: + * + * %data + * an unsigned 32-bits number. + * %st + * a &struct dtv_fe_stats array of statistics. + * %buffer + * a buffer of up to 32 characters (currently unused). + */ struct dtv_property { __u32 cmd; __u32 reserved[3]; @@ -469,17 +860,70 @@ struct dtv_property { /* num of properties cannot exceed DTV_IOCTL_MAX_MSGS per ioctl */ #define DTV_IOCTL_MAX_MSGS 64 +/** + * struct dtv_properties - a set of command/value pairs. + * + * @num: amount of commands stored at the struct. + * @props: a pointer to &struct dtv_property. + */ struct dtv_properties { __u32 num; struct dtv_property *props; }; +/* + * When set, this flag will disable any zigzagging or other "normal" tuning + * behavior. Additionally, there will be no automatic monitoring of the lock + * status, and hence no frontend events will be generated. If a frontend device + * is closed, this flag will be automatically turned off when the device is + * reopened read-write. + */ +#define FE_TUNE_MODE_ONESHOT 0x01 + +/* Digital TV Frontend API calls */ + +#define FE_GET_INFO _IOR('o', 61, struct dvb_frontend_info) + +#define FE_DISEQC_RESET_OVERLOAD _IO('o', 62) +#define FE_DISEQC_SEND_MASTER_CMD _IOW('o', 63, struct dvb_diseqc_master_cmd) +#define FE_DISEQC_RECV_SLAVE_REPLY _IOR('o', 64, struct dvb_diseqc_slave_reply) +#define FE_DISEQC_SEND_BURST _IO('o', 65) /* fe_sec_mini_cmd_t */ + +#define FE_SET_TONE _IO('o', 66) /* fe_sec_tone_mode_t */ +#define FE_SET_VOLTAGE _IO('o', 67) /* fe_sec_voltage_t */ +#define FE_ENABLE_HIGH_LNB_VOLTAGE _IO('o', 68) /* int */ + +#define FE_READ_STATUS _IOR('o', 69, fe_status_t) +#define FE_READ_BER _IOR('o', 70, __u32) +#define FE_READ_SIGNAL_STRENGTH _IOR('o', 71, __u16) +#define FE_READ_SNR _IOR('o', 72, __u16) +#define FE_READ_UNCORRECTED_BLOCKS _IOR('o', 73, __u32) + +#define FE_SET_FRONTEND_TUNE_MODE _IO('o', 81) /* unsigned int */ +#define FE_GET_EVENT _IOR('o', 78, struct dvb_frontend_event) + +#define FE_DISHNETWORK_SEND_LEGACY_CMD _IO('o', 80) /* unsigned int */ + +#define FE_SET_PROPERTY _IOW('o', 82, struct dtv_properties) +#define FE_GET_PROPERTY _IOR('o', 83, struct dtv_properties) + #if defined(__DVB_CORE__) || !defined (__KERNEL__) /* - * DEPRECATED: The DVBv3 ioctls, structs and enums should not be used on - * newer programs, as it doesn't support the second generation of digital - * TV standards, nor supports newer delivery systems. + * DEPRECATED: Everything below is deprecated in favor of DVBv5 API + * + * The DVBv3 only ioctls, structs and enums should not be used on + * newer programs, as it doesn't support the second generation of + * digital TV standards, nor supports newer delivery systems. + * They also don't support modern frontends with usually support multiple + * delivery systems. + * + * Drivers shouldn't use them. + * + * New applications should use DVBv5 delivery system instead + */ + +/* */ enum fe_bandwidth { @@ -492,7 +936,7 @@ enum fe_bandwidth { BANDWIDTH_1_712_MHZ, }; -/* This is needed for legacy userspace support */ +/* This is kept for legacy userspace support */ typedef enum fe_sec_voltage fe_sec_voltage_t; typedef enum fe_caps fe_caps_t; typedef enum fe_type fe_type_t; @@ -510,6 +954,8 @@ typedef enum fe_pilot fe_pilot_t; typedef enum fe_rolloff fe_rolloff_t; typedef enum fe_delivery_system fe_delivery_system_t; +/* DVBv3 structs */ + struct dvb_qpsk_parameters { __u32 symbol_rate; /* symbol rate in Symbols per second */ fe_code_rate_t fec_inner; /* forward error correction (see above) */ @@ -551,42 +997,12 @@ struct dvb_frontend_event { fe_status_t status; struct dvb_frontend_parameters parameters; }; -#endif - -#define FE_SET_PROPERTY _IOW('o', 82, struct dtv_properties) -#define FE_GET_PROPERTY _IOR('o', 83, struct dtv_properties) - -/** - * When set, this flag will disable any zigzagging or other "normal" tuning - * behaviour. Additionally, there will be no automatic monitoring of the lock - * status, and hence no frontend events will be generated. If a frontend device - * is closed, this flag will be automatically turned off when the device is - * reopened read-write. - */ -#define FE_TUNE_MODE_ONESHOT 0x01 -#define FE_GET_INFO _IOR('o', 61, struct dvb_frontend_info) - -#define FE_DISEQC_RESET_OVERLOAD _IO('o', 62) -#define FE_DISEQC_SEND_MASTER_CMD _IOW('o', 63, struct dvb_diseqc_master_cmd) -#define FE_DISEQC_RECV_SLAVE_REPLY _IOR('o', 64, struct dvb_diseqc_slave_reply) -#define FE_DISEQC_SEND_BURST _IO('o', 65) /* fe_sec_mini_cmd_t */ - -#define FE_SET_TONE _IO('o', 66) /* fe_sec_tone_mode_t */ -#define FE_SET_VOLTAGE _IO('o', 67) /* fe_sec_voltage_t */ -#define FE_ENABLE_HIGH_LNB_VOLTAGE _IO('o', 68) /* int */ - -#define FE_READ_STATUS _IOR('o', 69, fe_status_t) -#define FE_READ_BER _IOR('o', 70, __u32) -#define FE_READ_SIGNAL_STRENGTH _IOR('o', 71, __u16) -#define FE_READ_SNR _IOR('o', 72, __u16) -#define FE_READ_UNCORRECTED_BLOCKS _IOR('o', 73, __u32) +/* DVBv3 API calls */ #define FE_SET_FRONTEND _IOW('o', 76, struct dvb_frontend_parameters) #define FE_GET_FRONTEND _IOR('o', 77, struct dvb_frontend_parameters) -#define FE_SET_FRONTEND_TUNE_MODE _IO('o', 81) /* unsigned int */ -#define FE_GET_EVENT _IOR('o', 78, struct dvb_frontend_event) -#define FE_DISHNETWORK_SEND_LEGACY_CMD _IO('o', 80) /* unsigned int */ +#endif #endif /*_DVBFRONTEND_H_*/ -- cgit From 9d5e27cbc117671959a9f625e51c754f5a0666e3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 30 Aug 2017 13:45:20 -0400 Subject: media: dvb frontend docs: use kernel-doc documentation Now that frontend.h contains most documentation for the frontend, remove the duplicated information from Documentation/ and use the kernel-doc auto-generated one instead. That should simplify maintainership of DVB frontend uAPI, as most of the documentation will stick with the header file. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index 16a318fc469a..e7c29d0bdee4 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -562,10 +562,10 @@ enum fe_pilot { }; /** - * enum fe_rolloff - Rolloff factor (also known as alpha) - * @ROLLOFF_35: Roloff factor: 35% - * @ROLLOFF_20: Roloff factor: 20% - * @ROLLOFF_25: Roloff factor: 25% + * enum fe_rolloff - Rolloff factor + * @ROLLOFF_35: Roloff factor: α=35% + * @ROLLOFF_20: Roloff factor: α=20% + * @ROLLOFF_25: Roloff factor: α=25% * @ROLLOFF_AUTO: Auto-detect the roloff factor. * * .. note: -- cgit From 791edca5685b26d4575e59f5420ba3e206f5cebb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 31 Aug 2017 12:52:45 -0400 Subject: media: dmx.h: get rid of unused DMX_KERNEL_CLIENT There's a flag defined for Digital TV demux that is not used anywhere, called DMX_KERNEL_CLIENT. Get rid of it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/dmx.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index 1bc4d6fb0f01..1702f923d425 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -103,7 +103,6 @@ struct dmx_sct_filter_params #define DMX_CHECK_CRC 1 #define DMX_ONESHOT 2 #define DMX_IMMEDIATE_START 4 -#define DMX_KERNEL_CLIENT 0x8000 }; -- cgit From 286fe1ca3fa1b6fcc7ce8695b7c8d681e6e1c3b7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 31 Aug 2017 14:11:34 -0400 Subject: media: dmx.h: get rid of DMX_GET_CAPS There's no driver currently using it; it is also not documented about what it would be supposed to do. So, get rid of it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/dmx.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index 1702f923d425..db8bd00c93de 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -115,11 +115,6 @@ struct dmx_pes_filter_params __u32 flags; }; -struct dmx_caps { - __u32 caps; - int num_decoders; -}; - enum dmx_source { DMX_SOURCE_FRONT0 = 0, DMX_SOURCE_FRONT1, @@ -143,7 +138,6 @@ struct dmx_stc { #define DMX_SET_PES_FILTER _IOW('o', 44, struct dmx_pes_filter_params) #define DMX_SET_BUFFER_SIZE _IO('o', 45) #define DMX_GET_PES_PIDS _IOR('o', 47, __u16[5]) -#define DMX_GET_CAPS _IOR('o', 48, struct dmx_caps) #define DMX_SET_SOURCE _IOW('o', 49, enum dmx_source) #define DMX_GET_STC _IOWR('o', 50, struct dmx_stc) #define DMX_ADD_PID _IOW('o', 51, __u16) @@ -156,7 +150,6 @@ typedef enum dmx_output dmx_output_t; typedef enum dmx_input dmx_input_t; typedef enum dmx_ts_pes dmx_pes_type_t; typedef struct dmx_filter dmx_filter_t; -typedef struct dmx_caps dmx_caps_t; typedef enum dmx_source dmx_source_t; #endif -- cgit From 13adefbe9e566c6db91579e4ce17f1e5193d6f2c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 31 Aug 2017 14:21:43 -0400 Subject: media: dmx.h: get rid of DMX_SET_SOURCE No driver uses this ioctl, nor it is documented anywhere. So, get rid of it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/dmx.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index db8bd00c93de..08dc17060321 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -115,16 +115,6 @@ struct dmx_pes_filter_params __u32 flags; }; -enum dmx_source { - DMX_SOURCE_FRONT0 = 0, - DMX_SOURCE_FRONT1, - DMX_SOURCE_FRONT2, - DMX_SOURCE_FRONT3, - DMX_SOURCE_DVR0 = 16, - DMX_SOURCE_DVR1, - DMX_SOURCE_DVR2, - DMX_SOURCE_DVR3 -}; struct dmx_stc { unsigned int num; /* input : which STC? 0..N */ @@ -138,7 +128,6 @@ struct dmx_stc { #define DMX_SET_PES_FILTER _IOW('o', 44, struct dmx_pes_filter_params) #define DMX_SET_BUFFER_SIZE _IO('o', 45) #define DMX_GET_PES_PIDS _IOR('o', 47, __u16[5]) -#define DMX_SET_SOURCE _IOW('o', 49, enum dmx_source) #define DMX_GET_STC _IOWR('o', 50, struct dmx_stc) #define DMX_ADD_PID _IOW('o', 51, __u16) #define DMX_REMOVE_PID _IOW('o', 52, __u16) @@ -150,7 +139,6 @@ typedef enum dmx_output dmx_output_t; typedef enum dmx_input dmx_input_t; typedef enum dmx_ts_pes dmx_pes_type_t; typedef struct dmx_filter dmx_filter_t; -typedef enum dmx_source dmx_source_t; #endif -- cgit From bb98e6d280e00a1180f47d3391ee0bd1f312b5f6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 31 Aug 2017 12:28:52 -0400 Subject: media: dmx.h: add kernel-doc markups and use it at Documentation/ The demux documentation is pretty poor nowadays: most of the structs and enums aren't documented at all. Add proper kernel-doc markups for them and use it. Now, the demux API data structures are fully documented :-) Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/dmx.h | 139 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 114 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index 08dc17060321..4e3f3a2fe83f 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -32,26 +32,74 @@ #define DMX_FILTER_SIZE 16 -enum dmx_output -{ - DMX_OUT_DECODER, /* Streaming directly to decoder. */ - DMX_OUT_TAP, /* Output going to a memory buffer */ - /* (to be retrieved via the read command).*/ - DMX_OUT_TS_TAP, /* Output multiplexed into a new TS */ - /* (to be retrieved by reading from the */ - /* logical DVR device). */ - DMX_OUT_TSDEMUX_TAP /* Like TS_TAP but retrieved from the DMX device */ +/** + * enum dmx_output - Output for the demux. + * + * @DMX_OUT_DECODER: + * Streaming directly to decoder. + * @DMX_OUT_TAP: + * Output going to a memory buffer (to be retrieved via the read command). + * Delivers the stream output to the demux device on which the ioctl + * is called. + * @DMX_OUT_TS_TAP: + * Output multiplexed into a new TS (to be retrieved by reading from the + * logical DVR device). Routes output to the logical DVR device + * ``/dev/dvb/adapter?/dvr?``, which delivers a TS multiplexed from all + * filters for which @DMX_OUT_TS_TAP was specified. + * @DMX_OUT_TSDEMUX_TAP: + * Like @DMX_OUT_TS_TAP but retrieved from the DMX device. + */ +enum dmx_output { + DMX_OUT_DECODER, + DMX_OUT_TAP, + DMX_OUT_TS_TAP, + DMX_OUT_TSDEMUX_TAP }; -enum dmx_input -{ - DMX_IN_FRONTEND, /* Input from a front-end device. */ - DMX_IN_DVR /* Input from the logical DVR device. */ + +/** + * enum dmx_input - Input from the demux. + * + * @DMX_IN_FRONTEND: Input from a front-end device. + * @DMX_IN_DVR: Input from the logical DVR device. + */ +enum dmx_input { + DMX_IN_FRONTEND, + DMX_IN_DVR }; +/** + * enum dmx_ts_pes - type of the PES filter. + * + * @DMX_PES_AUDIO0: first audio PID. Also referred as @DMX_PES_AUDIO. + * @DMX_PES_VIDEO0: first video PID. Also referred as @DMX_PES_VIDEO. + * @DMX_PES_TELETEXT0: first teletext PID. Also referred as @DMX_PES_TELETEXT. + * @DMX_PES_SUBTITLE0: first subtitle PID. Also referred as @DMX_PES_SUBTITLE. + * @DMX_PES_PCR0: first Program Clock Reference PID. + * Also referred as @DMX_PES_PCR. + * + * @DMX_PES_AUDIO1: second audio PID. + * @DMX_PES_VIDEO1: second video PID. + * @DMX_PES_TELETEXT1: second teletext PID. + * @DMX_PES_SUBTITLE1: second subtitle PID. + * @DMX_PES_PCR1: second Program Clock Reference PID. + * + * @DMX_PES_AUDIO2: third audio PID. + * @DMX_PES_VIDEO2: third video PID. + * @DMX_PES_TELETEXT2: third teletext PID. + * @DMX_PES_SUBTITLE2: third subtitle PID. + * @DMX_PES_PCR2: third Program Clock Reference PID. + * + * @DMX_PES_AUDIO3: fourth audio PID. + * @DMX_PES_VIDEO3: fourth video PID. + * @DMX_PES_TELETEXT3: fourth teletext PID. + * @DMX_PES_SUBTITLE3: fourth subtitle PID. + * @DMX_PES_PCR3: fourth Program Clock Reference PID. + * + * @DMX_PES_OTHER: any other PID. + */ -enum dmx_ts_pes -{ +enum dmx_ts_pes { DMX_PES_AUDIO0, DMX_PES_VIDEO0, DMX_PES_TELETEXT0, @@ -86,16 +134,42 @@ enum dmx_ts_pes #define DMX_PES_PCR DMX_PES_PCR0 -struct dmx_filter -{ + +/** + * struct dmx_filter - Specifies a section header filter. + * + * @filter: bit array with bits to be matched at the section header. + * @mask: bits that are valid at the filter bit array. + * @mode: mode of match: if bit is zero, it will match if equal (positive + * match); if bit is one, it will match if the bit is negated. + * + * Note: All arrays in this struct have a size of DMX_FILTER_SIZE (16 bytes). + */ +struct dmx_filter { __u8 filter[DMX_FILTER_SIZE]; __u8 mask[DMX_FILTER_SIZE]; __u8 mode[DMX_FILTER_SIZE]; }; - -struct dmx_sct_filter_params -{ +/** + * struct dmx_sct_filter_params - Specifies a section filter. + * + * @pid: PID to be filtered. + * @filter: section header filter, as defined by &struct dmx_filter. + * @timeout: maximum time to filter, in milliseconds. + * @flags: extra flags for the section filter. + * + * Carries the configuration for a MPEG-TS section filter. + * + * The @flags can be: + * + * - %DMX_CHECK_CRC - only deliver sections where the CRC check succeeded; + * - %DMX_ONESHOT - disable the section filter after one section + * has been delivered; + * - %DMX_IMMEDIATE_START - Start filter immediately without requiring a + * :ref:`DMX_START`. + */ +struct dmx_sct_filter_params { __u16 pid; struct dmx_filter filter; __u32 timeout; @@ -105,7 +179,16 @@ struct dmx_sct_filter_params #define DMX_IMMEDIATE_START 4 }; - +/** + * struct dmx_pes_filter_params - Specifies Packetized Elementary Stream (PES) + * filter parameters. + * + * @pid: PID to be filtered. + * @input: Demux input, as specified by &enum dmx_input. + * @output: Demux output, as specified by &enum dmx_output. + * @pes_type: Type of the pes filter, as specified by &enum dmx_pes_type. + * @flags: Demux PES flags. + */ struct dmx_pes_filter_params { __u16 pid; @@ -115,11 +198,17 @@ struct dmx_pes_filter_params __u32 flags; }; - +/** + * struct dmx_stc - Stores System Time Counter (STC) information. + * + * @num: input data: number of the STC, from 0 to N. + * @base: output: divisor for STC to get 90 kHz clock. + * @stc: output: stc in @base * 90 kHz units. + */ struct dmx_stc { - unsigned int num; /* input : which STC? 0..N */ - unsigned int base; /* output: divisor for stc to get 90 kHz clock */ - __u64 stc; /* output: stc in 'base'*90 kHz units */ + unsigned int num; + unsigned int base; + __u64 stc; }; #define DMX_START _IO('o', 41) -- cgit From 833ff5e7feda1a042b83e82208cef3d212ca0ef1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 Sep 2017 07:41:49 -0400 Subject: media: ca.h: get rid of CA_SET_PID This ioctl seems to be some attempt to support a feature at the bt8xx dst_ca driver. Yet, as said there, it "needs more work". Right now, the code there is just a boilerplate. At the end of the day, no driver uses this ioctl, nor it is documented anywhere (except for "needs more work"). So, get rid of it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/ca.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index 00cf24587bea..859f6c0c4751 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -73,11 +73,6 @@ struct ca_descr { unsigned char cw[8]; }; -struct ca_pid { - unsigned int pid; - int index; /* -1 == disable*/ -}; - #define CA_RESET _IO('o', 128) #define CA_GET_CAP _IOR('o', 129, struct ca_caps) #define CA_GET_SLOT_INFO _IOR('o', 130, struct ca_slot_info) @@ -85,7 +80,6 @@ struct ca_pid { #define CA_GET_MSG _IOR('o', 132, struct ca_msg) #define CA_SEND_MSG _IOW('o', 133, struct ca_msg) #define CA_SET_DESCR _IOW('o', 134, struct ca_descr) -#define CA_SET_PID _IOW('o', 135, struct ca_pid) #if !defined (__KERNEL__) @@ -95,7 +89,6 @@ typedef struct ca_descr_info ca_descr_info_t; typedef struct ca_caps ca_caps_t; typedef struct ca_msg ca_msg_t; typedef struct ca_descr ca_descr_t; -typedef struct ca_pid ca_pid_t; #endif -- cgit From fed7c4fe8bd0b131cc3f19ba2744061935cdcdb7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 Sep 2017 07:48:02 -0400 Subject: media: ca.h: document most CA data types For most of the stuff there, documenting is easy, as the header file contains information. Yet, I was unable to document two data structs: ca_msg and ca_descr As those two structs are used by a few drivers, keep them. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/ca.h | 78 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index 859f6c0c4751..7ee641b4124c 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -24,39 +24,81 @@ #ifndef _DVBCA_H_ #define _DVBCA_H_ -/* slot interface types and info */ +/** + * struct ca_slot_info - CA slot interface types and info. + * + * @num: slot number. + * @type: slot type. + * @flags: flags applicable to the slot. + * + * This struct stores the CA slot information. + * + * @type can be: + * + * - %CA_CI - CI high level interface; + * - %CA_CI_LINK - CI link layer level interface; + * - %CA_CI_PHYS - CI physical layer level interface; + * - %CA_DESCR - built-in descrambler; + * - %CA_SC -simple smart card interface. + * + * @flags can be: + * + * - %CA_CI_MODULE_PRESENT - module (or card) inserted; + * - %CA_CI_MODULE_READY - module is ready for usage. + */ struct ca_slot_info { - int num; /* slot number */ - - int type; /* CA interface this slot supports */ -#define CA_CI 1 /* CI high level interface */ -#define CA_CI_LINK 2 /* CI link layer level interface */ -#define CA_CI_PHYS 4 /* CI physical layer level interface */ -#define CA_DESCR 8 /* built-in descrambler */ -#define CA_SC 128 /* simple smart card interface */ + int num; + int type; +#define CA_CI 1 +#define CA_CI_LINK 2 +#define CA_CI_PHYS 4 +#define CA_DESCR 8 +#define CA_SC 128 unsigned int flags; -#define CA_CI_MODULE_PRESENT 1 /* module (or card) inserted */ +#define CA_CI_MODULE_PRESENT 1 #define CA_CI_MODULE_READY 2 }; -/* descrambler types and info */ - +/** + * struct ca_descr_info - descrambler types and info. + * + * @num: number of available descramblers (keys). + * @type: type of supported scrambling system. + * + * Identifies the number of descramblers and their type. + * + * @type can be: + * + * - %CA_ECD - European Common Descrambler (ECD) hardware; + * - %CA_NDS - Videoguard (NDS) hardware; + * - %CA_DSS - Distributed Sample Scrambling (DSS) hardware. + */ struct ca_descr_info { - unsigned int num; /* number of available descramblers (keys) */ - unsigned int type; /* type of supported scrambling system */ + unsigned int num; + unsigned int type; #define CA_ECD 1 #define CA_NDS 2 #define CA_DSS 4 }; +/** + * struct ca_caps - CA slot interface capabilities. + * + * @slot_num: total number of CA card and module slots. + * @slot_type: bitmap with all supported types as defined at + * &struct ca_slot_info (e. g. %CA_CI, %CA_CI_LINK, etc). + * @descr_num: total number of descrambler slots (keys) + * @descr_type: bitmap with all supported types as defined at + * &struct ca_descr_info (e. g. %CA_ECD, %CA_NDS, etc). + */ struct ca_caps { - unsigned int slot_num; /* total number of CA card and module slots */ - unsigned int slot_type; /* OR of all supported types */ - unsigned int descr_num; /* total number of descrambler slots (keys) */ - unsigned int descr_type; /* OR of all supported types */ + unsigned int slot_num; + unsigned int slot_type; + unsigned int descr_num; + unsigned int descr_type; }; /* a message to/from a CI-CAM */ -- cgit From 5176d6eefd5d58fbb787f96c2140cffb2e826b17 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 Sep 2017 15:05:28 -0400 Subject: media: frontend.h: Avoid the term DVB when doesn't refer to a delivery system The DVB term can either refer to the subsystem or to a delivery system. Avoid it in the first case at the kernel-doc markups. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/frontend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index e7c29d0bdee4..fc2edb6014fe 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -239,11 +239,11 @@ enum fe_sec_mini_cmd { * @FE_NONE: The frontend doesn't have any kind of lock. * That's the initial frontend status * @FE_HAS_SIGNAL: Has found something above the noise level. - * @FE_HAS_CARRIER: Has found a DVB signal. + * @FE_HAS_CARRIER: Has found a signal. * @FE_HAS_VITERBI: FEC inner coding (Viterbi, LDPC or other inner code). * is stable. * @FE_HAS_SYNC: Synchronization bytes was found. - * @FE_HAS_LOCK: DVB were locked and everything is working. + * @FE_HAS_LOCK: Digital TV were locked and everything is working. * @FE_TIMEDOUT: Fo lock within the last about 2 seconds. * @FE_REINIT: Frontend was reinitialized, application is recommended * to reset DiSEqC, tone and parameters. @@ -269,7 +269,7 @@ enum fe_status { * This parameter indicates if spectral inversion should be presumed or * not. In the automatic setting (``INVERSION_AUTO``) the hardware will try * to figure out the correct setting by itself. If the hardware doesn't - * support, the DVB core will try to lock at the carrier first with + * support, the %dvb_frontend will try to lock at the carrier first with * inversion off. If it fails, it will try to enable inversion. */ enum fe_spectral_inversion { -- cgit From 56d51b65bcc7a5780663abd579fb6f039616b347 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 1 Sep 2017 15:45:47 -0400 Subject: media: net.h: add kernel-doc and use it at Documentation/ As we did with frontend.h, ca.h and dmx.h, move the struct definition to net.h. That should help to keep it updated, as more stuff gets added there. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/net.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/dvb/net.h b/include/uapi/linux/dvb/net.h index f451e7eb0b0b..89d805f9a5a6 100644 --- a/include/uapi/linux/dvb/net.h +++ b/include/uapi/linux/dvb/net.h @@ -26,6 +26,21 @@ #include +/** + * struct dvb_net_if - describes a DVB network interface + * + * @pid: Packet ID (PID) of the MPEG-TS that contains data + * @if_num: number of the Digital TV interface. + * @feedtype: Encapsulation type of the feed. + * + * A MPEG-TS stream may contain packet IDs with IP packages on it. + * This struct describes it, and the type of encoding. + * + * @feedtype can be: + * + * - %DVB_NET_FEEDTYPE_MPE for MPE encoding + * - %DVB_NET_FEEDTYPE_ULE for ULE encoding. + */ struct dvb_net_if { __u16 pid; __u16 if_num; -- cgit From bd9049edc66e13e868f819c39844f60443e70817 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 3 Sep 2017 20:50:17 -0400 Subject: media: ca docs: document CA_SET_DESCR ioctl and structs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The av7110 driver uses CA_SET_DESCR to store the descrambler control words at the CA descrambler slots. Document it. Thanks-to: Honza Petrouš Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/ca.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index 7ee641b4124c..c36fdb8e2733 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -109,9 +109,16 @@ struct ca_msg { unsigned char msg[256]; }; +/** + * struct ca_descr - CA descrambler control words info + * + * @index: CA Descrambler slot + * @parity: control words parity, where 0 means even and 1 means odd + * @cw: CA Descrambler control words + */ struct ca_descr { unsigned int index; - unsigned int parity; /* 0 == even, 1 == odd */ + unsigned int parity; unsigned char cw[8]; }; -- cgit From 7e6854a9bfea9ed6553acd0204da5101c9a2e6a0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 4 Sep 2017 08:03:40 -0400 Subject: media: ca.h: document ca_msg and the corresponding ioctls Usually, CA messages are sent/received via reading/writing at the CA device node. However, two drivers (dst_ca and firedtv-ci) also implement it via ioctls. Apparently, on both cases, the net result is the same. Anyway, let's document it. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/ca.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index c36fdb8e2733..24fc38efbc2b 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -101,7 +101,16 @@ struct ca_caps { unsigned int descr_type; }; -/* a message to/from a CI-CAM */ +/** + * struct ca_msg - a message to/from a CI-CAM + * + * @index: unused + * @type: unused + * @length: length of the message + * @msg: message + * + * This struct carries a message to be send/received from a CI CA module. + */ struct ca_msg { unsigned int index; unsigned int type; -- cgit From cd91304e7190b4c4802f8e413ab2214b233e0260 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Sep 2017 12:53:11 +0200 Subject: ovl: fix relatime for directories Need to treat non-regular overlayfs files the same as regular files when checking for an atime update. Add a d_real() flag to make it return the upper dentry for all file types. Reported-by: "zhangyi (F)" Signed-off-by: Miklos Szeredi --- include/linux/dcache.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index fd0721e520f4..ed1a7cf6923a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -562,6 +562,9 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) return upper; } +/* d_real() flags */ +#define D_REAL_UPPER 0x2 /* return upper dentry or NULL if non-upper */ + /** * d_real - Return the real dentry * @dentry: the dentry to query -- cgit From 17ad4fdd09e1613d7047aa9e00d7de68ad350204 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 Sep 2017 15:57:29 +0100 Subject: drm/i915/perf: Remove __user from u64 in drm_i915_perf_oa_config Sparse complains that these integers from which we form void __user *, and so we don't need the annotation itself inside the uABI. Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20170901145729.21363-2-chris@chris-wilson.co.uk Reviewed-by: Lionel Landwerlin --- include/uapi/drm/i915_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6598fb76d2c2..d8d10d932759 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1509,9 +1509,9 @@ struct drm_i915_perf_oa_config { __u32 n_boolean_regs; __u32 n_flex_regs; - __u64 __user mux_regs_ptr; - __u64 __user boolean_regs_ptr; - __u64 __user flex_regs_ptr; + __u64 mux_regs_ptr; + __u64 boolean_regs_ptr; + __u64 flex_regs_ptr; }; #if defined(__cplusplus) -- cgit From e4faa09b0dae4f8f429922190e9aa99a564ff785 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 5 Sep 2017 07:02:44 -0400 Subject: media: dvb headers: make checkpatch happier Adjust dvb ca.h, dmx.h and frontend.h in order to make checkpatch happier. Now, it only complains about the typedefs, and those are there just to provide backward userspace compatibility. Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/dvb/ca.h | 2 +- include/uapi/linux/dvb/dmx.h | 5 ++--- include/uapi/linux/dvb/frontend.h | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/dvb/ca.h b/include/uapi/linux/dvb/ca.h index 24fc38efbc2b..cb150029fdff 100644 --- a/include/uapi/linux/dvb/ca.h +++ b/include/uapi/linux/dvb/ca.h @@ -139,7 +139,7 @@ struct ca_descr { #define CA_SEND_MSG _IOW('o', 133, struct ca_msg) #define CA_SET_DESCR _IOW('o', 134, struct ca_descr) -#if !defined (__KERNEL__) +#if !defined(__KERNEL__) /* This is needed for legacy userspace support */ typedef struct ca_slot_info ca_slot_info_t; diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h index 4e3f3a2fe83f..4aa5f6a1815a 100644 --- a/include/uapi/linux/dvb/dmx.h +++ b/include/uapi/linux/dvb/dmx.h @@ -189,8 +189,7 @@ struct dmx_sct_filter_params { * @pes_type: Type of the pes filter, as specified by &enum dmx_pes_type. * @flags: Demux PES flags. */ -struct dmx_pes_filter_params -{ +struct dmx_pes_filter_params { __u16 pid; enum dmx_input input; enum dmx_output output; @@ -221,7 +220,7 @@ struct dmx_stc { #define DMX_ADD_PID _IOW('o', 51, __u16) #define DMX_REMOVE_PID _IOW('o', 52, __u16) -#if !defined (__KERNEL__) +#if !defined(__KERNEL__) /* This is needed for legacy userspace support */ typedef enum dmx_output dmx_output_t; diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h index fc2edb6014fe..861cacd5711f 100644 --- a/include/uapi/linux/dvb/frontend.h +++ b/include/uapi/linux/dvb/frontend.h @@ -907,7 +907,7 @@ struct dtv_properties { #define FE_SET_PROPERTY _IOW('o', 82, struct dtv_properties) #define FE_GET_PROPERTY _IOR('o', 83, struct dtv_properties) -#if defined(__DVB_CORE__) || !defined (__KERNEL__) +#if defined(__DVB_CORE__) || !defined(__KERNEL__) /* * DEPRECATED: Everything below is deprecated in favor of DVBv5 API @@ -982,8 +982,8 @@ struct dvb_ofdm_parameters { }; struct dvb_frontend_parameters { - __u32 frequency; /* (absolute) frequency in Hz for DVB-C/DVB-T/ATSC */ - /* intermediate frequency in kHz for DVB-S */ + __u32 frequency; /* (absolute) frequency in Hz for DVB-C/DVB-T/ATSC */ + /* intermediate frequency in kHz for DVB-S */ fe_spectral_inversion_t inversion; union { struct dvb_qpsk_parameters qpsk; /* DVB-S */ -- cgit From 7760e223507b5218d0674b5e2e8dc29fea079a89 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 31 Aug 2017 16:59:06 -0700 Subject: scsi: Remove Scsi_Host.uspace_req_q A patch I wrote myself several years ago removed SCSI target support from the code under drivers/scsi. That patch removed the code that sets uspace_req_q to a non-NULL value. Hence also remove the code that depends on uspace_req_q != NULL. References: commit 066465251303 ("tgt: removal") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/scsi/scsi_host.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index afb04811b7b9..0a804b1a4726 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -691,12 +691,6 @@ struct Scsi_Host { unsigned int prot_capabilities; unsigned char prot_guard_type; - /* - * q used for scsi_tgt msgs, async events or any other requests that - * need to be processed in userspace - */ - struct request_queue *uspace_req_q; - /* legacy crap */ unsigned long base; unsigned long io_port; -- cgit From fbf1c41fc0f4d3574ac2377245efd666c1fa3075 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 3 Sep 2017 01:18:41 +0100 Subject: workqueue: Fix flag collision Commit 0a94efb5acbb ("workqueue: implicit ordered attribute should be overridable") introduced a __WQ_ORDERED_EXPLICIT flag but gave it the same value as __WQ_LEGACY. I don't believe these were intended to mean the same thing, so renumber __WQ_ORDERED_EXPLICIT. Fixes: 0a94efb5acbb ("workqueue: implicit ordered attribute should be ...") Signed-off-by: Ben Hutchings Cc: stable@vger.kernel.org # v4.13 Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index db6dc9dc0482..1c49431f3121 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -323,8 +323,8 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ - __WQ_ORDERED_EXPLICIT = 1 << 18, /* internal: alloc_ordered_workqueue() */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ + __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ -- cgit From fd0c88b70060e03a2215259ed29b4b31497ad205 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:05:47 +0200 Subject: net/ncsi: fix ncsi_vlan_rx_{add,kill}_vid references We get a new link error in allmodconfig kernels after ftgmac100 started using the ncsi helpers: ERROR: "ncsi_vlan_rx_kill_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined! ERROR: "ncsi_vlan_rx_add_vid" [drivers/net/ethernet/faraday/ftgmac100.ko] undefined! Related to that, we get another error when CONFIG_NET_NCSI is disabled: drivers/net/ethernet/faraday/ftgmac100.c:1626:25: error: 'ncsi_vlan_rx_add_vid' undeclared here (not in a function); did you mean 'ncsi_start_dev'? drivers/net/ethernet/faraday/ftgmac100.c:1627:26: error: 'ncsi_vlan_rx_kill_vid' undeclared here (not in a function); did you mean 'ncsi_vlan_rx_add_vid'? This fixes both problems at once, using a 'static inline' stub helper for the disabled case, and exporting the functions when they are present. Fixes: 51564585d8c6 ("ftgmac100: Support NCSI VLAN filtering when available") Fixes: 21acf63013ed ("net/ncsi: Configure VLAN tag filter") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/ncsi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1f96af46df49..fdc60ff2511d 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -36,6 +36,16 @@ int ncsi_start_dev(struct ncsi_dev *nd); void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ +static inline int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + return -EINVAL; +} + +static inline int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + return -EINVAL; +} + static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)) { -- cgit From 2c08ab3f2504bc7ba816ce6fde051b8bd5f028e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:31:35 +0200 Subject: soc: ti/knav_dma: include dmaengine header A header file cleanup apparently caused a build regression with one driver using the knav infrastructure: In file included from drivers/net/ethernet/ti/netcp_core.c:30:0: include/linux/soc/ti/knav_dma.h:129:30: error: field 'direction' has incomplete type enum dma_transfer_direction direction; ^~~~~~~~~ drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_txpipe_open': drivers/net/ethernet/ti/netcp_core.c:1349:21: error: 'DMA_MEM_TO_DEV' undeclared (first use in this function); did you mean 'DMA_MEMORY_MAP'? config.direction = DMA_MEM_TO_DEV; ^~~~~~~~~~~~~~ DMA_MEMORY_MAP drivers/net/ethernet/ti/netcp_core.c:1349:21: note: each undeclared identifier is reported only once for each function it appears in drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_setup_navigator_resources': drivers/net/ethernet/ti/netcp_core.c:1659:22: error: 'DMA_DEV_TO_MEM' undeclared (first use in this function); did you mean 'DMA_DESC_HOST'? config.direction = DMA_DEV_TO_MEM; As the header is no longer included implicitly through netdevice.h, we should include it in the header that references the enum. Fixes: 0dd5759dbb1c ("net: remove dmaengine.h inclusion from netdevice.h") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/soc/ti/knav_dma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 2b7882666ef6..66693bc4c6ad 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -17,6 +17,8 @@ #ifndef __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ #define __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ +#include + /* * PKTDMA descriptor manipulation macros for host packet descriptor */ -- cgit From 3a1214e8b06317b4e71cd3a36344df87b7858e19 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 1 Sep 2017 14:04:11 -0700 Subject: flow_dissector: Cleanup control flow __skb_flow_dissect is riddled with gotos that make discerning the flow, debugging, and extending the capability difficult. This patch reorganizes things so that we only perform goto's after the two main switch statements (no gotos within the cases now). It also eliminates several goto labels so that there are only two labels that can be target for goto. Reported-by: Alexander Popov Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e2663e900b0a..fc3dce730a6b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -19,6 +19,14 @@ struct flow_dissector_key_control { #define FLOW_DIS_FIRST_FRAG BIT(1) #define FLOW_DIS_ENCAPSULATION BIT(2) +enum flow_dissect_ret { + FLOW_DISSECT_RET_OUT_GOOD, + FLOW_DISSECT_RET_OUT_BAD, + FLOW_DISSECT_RET_PROTO_AGAIN, + FLOW_DISSECT_RET_IPPROTO_AGAIN, + FLOW_DISSECT_RET_CONTINUE, +}; + /** * struct flow_dissector_key_basic: * @thoff: Transport header offset -- cgit From 55199df6d2af55be414f40856efcb527811001bb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 3 Sep 2017 20:27:00 -0700 Subject: net: dsa: Allow switch drivers to indicate number of TX queues Let switch drivers indicate how many TX queues they support. Some switches, such as Broadcom Starfighter 2 are designed with 8 egress queues. Future changes will allow us to leverage the queue mapping and direct the transmission towards a particular queue. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 398ca8d70ccd..dd44d6ce1097 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -243,6 +243,9 @@ struct dsa_switch { /* devlink used to represent this switch device */ struct devlink *devlink; + /* Number of switch port queues */ + unsigned int num_tx_queues; + /* Dynamically allocated ports, keep last */ size_t num_ports; struct dsa_port ports[]; -- cgit From 0062818298662d0d05061949d12880146b5ebd65 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Aug 2017 15:06:14 -0400 Subject: rdma core: Add rdma_rw_mr_payload() The amount of payload per MR depends on device capabilities and the memory registration mode in use. The new rdma_rw API hides both, making it difficult for ULPs to determine how large their transport send queues need to be. Expose the MR payload information via a new API. Signed-off-by: Chuck Lever Acked-by: Doug Ledford Signed-off-by: J. Bruce Fields --- include/rdma/rw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 377d865e506d..a3cbbc7b6417 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -81,6 +81,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); +unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, + unsigned int maxpages); void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); void rdma_rw_cleanup_mrs(struct ib_qp *qp); -- cgit From 5482a978b962abd23f17a004e46d255d11646c20 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 4 Sep 2017 18:30:14 +0200 Subject: net: mdio-mux: add mdio_mux parameter to mdio_mux_init() mdio_mux_init() use the parameter dev for two distinct thing: 1) Have a device for all devm_ functions 2) Get device_node from it Since it is two distinct purpose, this patch add a parameter mdio_mux that is linked to task 2. This will also permit to register an of_node mdio-mux that lacks a direct owning device. For example a mdio-mux which is a subnode of a real device. Signed-off-by: Corentin Labbe Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio-mux.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mdio-mux.h b/include/linux/mdio-mux.h index 61f5b21b31c7..a5d58f221939 100644 --- a/include/linux/mdio-mux.h +++ b/include/linux/mdio-mux.h @@ -12,7 +12,16 @@ #include #include +/* mdio_mux_init() - Initialize a MDIO mux + * @dev The device owning the MDIO mux + * @mux_node The device node of the MDIO mux + * @switch_fn The function called for switching target MDIO child + * mux_handle A pointer to a (void *) used internaly by mdio-mux + * @data Private data used by switch_fn() + * @mux_bus An optional parent bus (Other case are to use parent_bus property) + */ int mdio_mux_init(struct device *dev, + struct device_node *mux_node, int (*switch_fn) (int cur, int desired, void *data), void **mux_handle, void *data, -- cgit From 03c6f7d64ac9c0a37cca91392ac4be8993a8f53d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Aug 2017 12:47:27 +1000 Subject: NFS: remove jiffies field from access cache This field hasn't been used since commit 57b691819ee2 ("NFS: Cache access checks more aggressively"). Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 238fdc4c46df..a0282ceaa48b 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -49,7 +49,6 @@ struct nfs_access_entry { struct rb_node rb_node; struct list_head lru; - unsigned long jiffies; struct rpc_cred * cred; __u32 mask; struct rcu_head rcu_head; -- cgit From 2ae409dc6a907e80f4cd32ad4482ef52441e3147 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Jul 2017 16:20:05 +0800 Subject: ceph: remove unused cap_release_safety mount option Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 8a79587e1317..dca30ac9bd34 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -93,7 +93,6 @@ struct ceph_options { #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ -#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) /* mount state */ enum { -- cgit From 4214fb158cc423ac31b841000e219855be055388 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 11 Jul 2017 18:49:44 +0800 Subject: ceph: validate correctness of some mount options Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index dca30ac9bd34..4c846aabd9f6 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -84,16 +84,6 @@ struct ceph_options { #define CEPH_AUTH_NAME_DEFAULT "guest" -/* - * Delay telling the MDS we no longer want caps, in case we reopen - * the file. Delay a minimum amount of time, even if we send a cap - * message for some other reason. Otherwise, take the oppotunity to - * update the mds to avoid sending another message later. - */ -#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ -#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ - - /* mount state */ enum { CEPH_MOUNT_MOUNTING, -- cgit From 3fb99d483e614bc3834784c7a686572c7970bb92 Mon Sep 17 00:00:00 2001 From: Yanhu Cao Date: Fri, 21 Jul 2017 17:20:10 +0800 Subject: ceph: nuke startsync op startsync is a no-op, has been for years. Remove it. Link: http://tracker.ceph.com/issues/20604 Signed-off-by: Yanhu Cao Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/rados.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index b8281feda9c7..01408841c9c4 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -230,7 +230,6 @@ extern const char *ceph_osd_state_name(int s); \ /* fancy write */ \ f(APPEND, __CEPH_OSD_OP(WR, DATA, 6), "append") \ - f(STARTSYNC, __CEPH_OSD_OP(WR, DATA, 7), "startsync") \ f(SETTRUNC, __CEPH_OSD_OP(WR, DATA, 8), "settrunc") \ f(TRIMTRUNC, __CEPH_OSD_OP(WR, DATA, 9), "trimtrunc") \ \ -- cgit From 95569713afc0b53ded1bba67834e0be24529a8c9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 24 Jul 2017 17:59:39 +0800 Subject: ceph: new cap message flags indicate if there is pending capsnap These flags tell mds if there is pending capsnap explicitly. Without this explicit notification, mds can only conclude if client has pending capsnap. The method mds use is inefficient and error-prone. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index edf5b04b918a..d1642a4b4c5e 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -669,7 +669,9 @@ enum { extern const char *ceph_cap_op_name(int op); /* flags field in client cap messages (version >= 10) */ -#define CEPH_CLIENT_CAPS_SYNC (0x1) +#define CEPH_CLIENT_CAPS_SYNC (1<<0) +#define CEPH_CLIENT_CAPS_NO_CAPSNAP (1<<1) +#define CEPH_CLIENT_CAPS_PENDING_CAPSNAP (1<<2); /* * caps message, used for capability callbacks, acks, requests, etc. -- cgit From 06d74376c8af32f5b8d777a943aa4dc99165088b Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Wed, 16 Aug 2017 10:19:27 -0400 Subject: ceph: more accurate statfs Improve accuracy of statfs reporting for Ceph filesystems comprising exactly one data pool. In this case, the Ceph monitor can now report the space usage for the single data pool instead of the global data for the entire Ceph cluster. Include support for this message in mon_client and leverage it in ceph/super. Signed-off-by: Douglas Fuller Reviewed-by: Yan, Zheng Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 ++ include/linux/ceph/mon_client.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index d1642a4b4c5e..b422170b791a 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -167,6 +167,8 @@ struct ceph_mon_request_header { struct ceph_mon_statfs { struct ceph_mon_request_header monhdr; struct ceph_fsid fsid; + __u8 contains_data_pool; + __le64 data_pool; } __attribute__ ((packed)); struct ceph_statfs { diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index d5a3ecea578d..0fa990bf867a 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -133,8 +133,8 @@ void ceph_monc_renew_subs(struct ceph_mon_client *monc); extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, unsigned long timeout); -extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, - struct ceph_statfs *buf); +int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool, + struct ceph_statfs *buf); int ceph_monc_get_version(struct ceph_mon_client *monc, const char *what, u64 *newest); -- cgit From b2770da6425406cf3f6d3fddbf9086b1db0106a1 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 6 Sep 2017 16:18:35 -0700 Subject: mm: add vm_insert_mixed_mkwrite() When servicing mmap() reads from file holes the current DAX code allocates a page cache page of all zeroes and places the struct page pointer in the mapping->page_tree radix tree. This has three major drawbacks: 1) It consumes memory unnecessarily. For every 4k page that is read via a DAX mmap() over a hole, we allocate a new page cache page. This means that if you read 1GiB worth of pages, you end up using 1GiB of zeroed memory. 2) It is slower than using a common zero page because each page fault has more work to do. Instead of just inserting a common zero page we have to allocate a page cache page, zero it, and then insert it. 3) The fact that we had to check for both DAX exceptional entries and for page cache pages in the radix tree made the DAX code more complex. This series solves these issues by following the lead of the DAX PMD code and using a common 4k zero page instead. This reduces memory usage and decreases latencies for some workloads, and it simplifies the DAX code, removing over 100 lines in total. This patch (of 5): To be able to use the common 4k zero page in DAX we need to have our PTE fault path look more like our PMD fault path where a PTE entry can be marked as dirty and writeable as it is first inserted rather than waiting for a follow-up dax_pfn_mkwrite() => finish_mkwrite_fault() call. Right now we can rely on having a dax_pfn_mkwrite() call because we can distinguish between these two cases in do_wp_page(): case 1: 4k zero page => writable DAX storage case 2: read-only DAX storage => writeable DAX storage This distinction is made by via vm_normal_page(). vm_normal_page() returns false for the common 4k zero page, though, just as it does for DAX ptes. Instead of special casing the DAX + 4k zero page case we will simplify our DAX PTE page fault sequence so that it matches our DAX PMD sequence, and get rid of the dax_pfn_mkwrite() helper. We will instead use dax_iomap_fault() to handle write-protection faults. This means that insert_pfn() needs to follow the lead of insert_pfn_pmd() and allow us to pass in a 'mkwrite' flag. If 'mkwrite' is set insert_pfn() will do the work that was previously done by wp_page_reuse() as part of the dax_pfn_mkwrite() call path. Link: http://lkml.kernel.org/r/20170724170616.25810-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Acked-by: Kirill A. Shutemov Cc: "Darrick J. Wong" Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c1f6c95f3496..eb5e4bc946cc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2294,6 +2294,8 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); +int vm_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); -- cgit From 91d25ba8a6b0d810dc844cebeedc53029118ce3e Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 6 Sep 2017 16:18:43 -0700 Subject: dax: use common 4k zero page for dax mmap reads When servicing mmap() reads from file holes the current DAX code allocates a page cache page of all zeroes and places the struct page pointer in the mapping->page_tree radix tree. This has three major drawbacks: 1) It consumes memory unnecessarily. For every 4k page that is read via a DAX mmap() over a hole, we allocate a new page cache page. This means that if you read 1GiB worth of pages, you end up using 1GiB of zeroed memory. This is easily visible by looking at the overall memory consumption of the system or by looking at /proc/[pid]/smaps: 7f62e72b3000-7f63272b3000 rw-s 00000000 103:00 12 /root/dax/data Size: 1048576 kB Rss: 1048576 kB Pss: 1048576 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 1048576 kB Private_Dirty: 0 kB Referenced: 1048576 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Locked: 0 kB 2) It is slower than using a common zero page because each page fault has more work to do. Instead of just inserting a common zero page we have to allocate a page cache page, zero it, and then insert it. Here are the average latencies of dax_load_hole() as measured by ftrace on a random test box: Old method, using zeroed page cache pages: 3.4 us New method, using the common 4k zero page: 0.8 us This was the average latency over 1 GiB of sequential reads done by this simple fio script: [global] size=1G filename=/root/dax/data fallocate=none [io] rw=read ioengine=mmap 3) The fact that we had to check for both DAX exceptional entries and for page cache pages in the radix tree made the DAX code more complex. Solve these issues by following the lead of the DAX PMD code and using a common 4k zero page instead. As with the PMD code we will now insert a DAX exceptional entry into the radix tree instead of a struct page pointer which allows us to remove all the special casing in the DAX code. Note that we do still pretty aggressively check for regular pages in the DAX radix tree, especially where we take action based on the bits set in the page. If we ever find a regular page in our radix tree now that most likely means that someone besides DAX is inserting pages (which has happened lots of times in the past), and we want to find that out early and fail loudly. This solution also removes the extra memory consumption. Here is that same /proc/[pid]/smaps after 1GiB of reading from a hole with the new code: 7f2054a74000-7f2094a74000 rw-s 00000000 103:00 12 /root/dax/data Size: 1048576 kB Rss: 0 kB Pss: 0 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 0 kB Referenced: 0 kB Anonymous: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Locked: 0 kB Overall system memory consumption is similarly improved. Another major change is that we remove dax_pfn_mkwrite() from our fault flow, and instead rely on the page fault itself to make the PTE dirty and writeable. The following description from the patch adding the vm_insert_mixed_mkwrite() call explains this a little more: "To be able to use the common 4k zero page in DAX we need to have our PTE fault path look more like our PMD fault path where a PTE entry can be marked as dirty and writeable as it is first inserted rather than waiting for a follow-up dax_pfn_mkwrite() => finish_mkwrite_fault() call. Right now we can rely on having a dax_pfn_mkwrite() call because we can distinguish between these two cases in do_wp_page(): case 1: 4k zero page => writable DAX storage case 2: read-only DAX storage => writeable DAX storage This distinction is made by via vm_normal_page(). vm_normal_page() returns false for the common 4k zero page, though, just as it does for DAX ptes. Instead of special casing the DAX + 4k zero page case we will simplify our DAX PTE page fault sequence so that it matches our DAX PMD sequence, and get rid of the dax_pfn_mkwrite() helper. We will instead use dax_iomap_fault() to handle write-protection faults. This means that insert_pfn() needs to follow the lead of insert_pfn_pmd() and allow us to pass in a 'mkwrite' flag. If 'mkwrite' is set insert_pfn() will do the work that was previously done by wp_page_reuse() as part of the dax_pfn_mkwrite() call path" Link: http://lkml.kernel.org/r/20170724170616.25810-4-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: "Darrick J. Wong" Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Steven Rostedt Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 12 +++++------- include/trace/events/fs_dax.h | 2 -- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index df97b7af7e2c..b3518559f0da 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -91,18 +91,17 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev); /* * We use lowest available bit in exceptional entry for locking, one bit for - * the entry size (PMD) and two more to tell us if the entry is a huge zero - * page (HZP) or an empty entry that is just used for locking. In total four - * special bits. + * the entry size (PMD) and two more to tell us if the entry is a zero page or + * an empty entry that is just used for locking. In total four special bits. * - * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and - * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem + * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE + * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem * block allocation. */ #define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) #define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) -#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) +#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) static inline unsigned long dax_radix_sector(void *entry) @@ -153,7 +152,6 @@ static inline unsigned int dax_radix_order(void *entry) return 0; } #endif -int dax_pfn_mkwrite(struct vm_fault *vmf); static inline bool dax_mapping(struct address_space *mapping) { diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h index 08bb3ed18dcc..fbc4a06f7310 100644 --- a/include/trace/events/fs_dax.h +++ b/include/trace/events/fs_dax.h @@ -190,8 +190,6 @@ DEFINE_EVENT(dax_pte_fault_class, name, \ DEFINE_PTE_FAULT_EVENT(dax_pte_fault); DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); -DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite_no_entry); -DEFINE_PTE_FAULT_EVENT(dax_pfn_mkwrite); DEFINE_PTE_FAULT_EVENT(dax_load_hole); TRACE_EVENT(dax_insert_mapping, -- cgit From d01ad197ac3b50a99ea668697acefe12e73c5fea Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 6 Sep 2017 16:18:47 -0700 Subject: dax: remove DAX code from page_cache_tree_insert() Now that we no longer insert struct page pointers in DAX radix trees we can remove the special casing for DAX in page_cache_tree_insert(). This also allows us to make dax_wake_mapping_entry_waiter() local to fs/dax.c, removing it from dax.h. Link: http://lkml.kernel.org/r/20170724170616.25810-5-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Suggested-by: Jan Kara Reviewed-by: Jan Kara Cc: "Darrick J. Wong" Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Steven Rostedt Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index b3518559f0da..319e0d997446 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -123,8 +123,6 @@ int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -void dax_wake_mapping_entry_waiter(struct address_space *mapping, - pgoff_t index, void *entry, bool wake_all); #ifdef CONFIG_FS_DAX int __dax_zero_page_range(struct block_device *bdev, -- cgit From 527b19d0808e75fbba896beb2435c2b4d6bcd32a Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 6 Sep 2017 16:18:51 -0700 Subject: dax: move all DAX radix tree defs to fs/dax.c Now that we no longer insert struct page pointers in DAX radix trees the page cache code no longer needs to know anything about DAX exceptional entries. Move all the DAX exceptional entry definitions from dax.h to fs/dax.c. Link: http://lkml.kernel.org/r/20170724170616.25810-6-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Suggested-by: Jan Kara Reviewed-by: Jan Kara Cc: "Darrick J. Wong" Cc: "Theodore Ts'o" Cc: Alexander Viro Cc: Andreas Dilger Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Steven Rostedt Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 319e0d997446..eb0bff6f1eab 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -89,33 +89,6 @@ void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); -/* - * We use lowest available bit in exceptional entry for locking, one bit for - * the entry size (PMD) and two more to tell us if the entry is a zero page or - * an empty entry that is just used for locking. In total four special bits. - * - * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE - * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem - * block allocation. - */ -#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) -#define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) -#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) -#define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) -#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) - -static inline unsigned long dax_radix_sector(void *entry) -{ - return (unsigned long)entry >> RADIX_DAX_SHIFT; -} - -static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) -{ - return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | - ((unsigned long)sector << RADIX_DAX_SHIFT) | - RADIX_DAX_ENTRY_LOCK); -} - ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, @@ -137,20 +110,6 @@ static inline int __dax_zero_page_range(struct block_device *bdev, } #endif -#ifdef CONFIG_FS_DAX_PMD -static inline unsigned int dax_radix_order(void *entry) -{ - if ((unsigned long)entry & RADIX_DAX_PMD) - return PMD_SHIFT - PAGE_SHIFT; - return 0; -} -#else -static inline unsigned int dax_radix_order(void *entry) -{ - return 0; -} -#endif - static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); -- cgit From 2482ddec670fb83717d129012bc558777cb159f7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 Sep 2017 16:19:18 -0700 Subject: mm: add SLUB free list pointer obfuscation This SLUB free list pointer obfuscation code is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. This adds a per-cache random value to SLUB caches that is XORed with their freelist pointer address and value. This adds nearly zero overhead and frustrates the very common heap overflow exploitation method of overwriting freelist pointers. A recent example of the attack is written up here: http://cyseclabs.com/blog/cve-2016-6187-heap-off-by-one-exploit and there is a section dedicated to the technique the book "A Guide to Kernel Exploitation: Attacking the Core". This is based on patches by Daniel Micay, and refactored to minimize the use of #ifdef. With 200-count cycles of "hackbench -g 20 -l 1000" I saw the following run times: before: mean 10.11882499999999999995 variance .03320378329145728642 stdev .18221905304181911048 after: mean 10.12654000000000000014 variance .04700556623115577889 stdev .21680767106160192064 The difference gets lost in the noise, but if the above is to be taken literally, using CONFIG_FREELIST_HARDENED is 0.07% slower. Link: http://lkml.kernel.org/r/20170802180609.GA66807@beast Signed-off-by: Kees Cook Suggested-by: Daniel Micay Cc: Rik van Riel Cc: Tycho Andersen Cc: Alexander Popov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index cc0faf3a90be..0783b622311e 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -115,6 +115,10 @@ struct kmem_cache { #endif #endif +#ifdef CONFIG_SLAB_FREELIST_HARDENED + unsigned long random; +#endif + #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. -- cgit From d460acb5bdffc19b492b70b8f416c24dc03c474e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Sep 2017 16:19:26 -0700 Subject: mm: track actual nr_scanned during shrink_slab() Some shrinkers may only be able to free a bunch of objects at a time, and so free more than the requested nr_to_scan in one pass. Whilst other shrinkers may find themselves even unable to scan as many objects as they counted, and so underreport. Account for the extra freed/scanned objects against the total number of objects we intend to scan, otherwise we may end up penalising the slab far more than intended. Similarly, we want to add the underperforming scan to the deferred pass so that we try harder and harder in future passes. Link: http://lkml.kernel.org/r/20170822135325.9191-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson Cc: Michal Hocko Cc: Johannes Weiner Cc: Hillf Danton Cc: Minchan Kim Cc: Vlastimil Babka Cc: Mel Gorman Cc: Shaohua Li Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Joonas Lahtinen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shrinker.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 4fcacd915d45..51d189615bda 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -18,6 +18,13 @@ struct shrink_control { */ unsigned long nr_to_scan; + /* + * How many objects did scan_objects process? + * This defaults to nr_to_scan before every call, but the callee + * should track its actual progress. + */ + unsigned long nr_scanned; + /* current node being shrunk (for NUMA aware shrinkers) */ int nid; -- cgit From e5e68930263377c6d4f6da0ff06f36b55d83a83f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Sep 2017 16:19:37 -0700 Subject: mm, memory_hotplug: display allowed zones in the preferred ordering Prior to commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") we used to allow to change the valid zone types of a memory block if it is adjacent to a different zone type. This fact was reflected in memoryNN/valid_zones by the ordering of printed zones. The first one was default (echo online > memoryNN/state) and the other one could be onlined explicitly by online_{movable,kernel}. This behavior was removed by the said patch and as such the ordering was not all that important. In most cases a kernel zone would be default anyway. The only exception is movable_node handled by "mm, memory_hotplug: support movable_node for hotpluggable nodes". Let's reintroduce this behavior again because later patch will remove the zone overlap restriction and so user will be allowed to online kernel resp. movable block regardless of its placement. Original behavior will then become significant again because it would be non-trivial for users to see what is the default zone to online into. Implementation is really simple. Pull out zone selection out of move_pfn_range into zone_for_pfn_range helper and use it in show_valid_zones to display the zone for default onlining and then both kernel and movable if they are allowed. Default online zone is not duplicated. Link: http://lkml.kernel.org/r/20170714121233.16861-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Reza Arbab Cc: Yasuaki Ishimatsu Cc: Xishi Qiu Cc: Kani Toshimitsu Cc: Cc: Daniel Kiper Cc: Igor Mammedov Cc: Vitaly Kuznetsov Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c8a5056a5ae0..5e6e4cc36ff4 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -319,6 +319,6 @@ extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type); -extern struct zone *default_zone_for_pfn(int nid, unsigned long pfn, +extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn, unsigned long nr_pages); #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit From c9bff3eebc09be23fbc868f5e6731666d23cbea3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Sep 2017 16:20:13 -0700 Subject: mm, page_alloc: rip out ZONELIST_ORDER_ZONE Patch series "cleanup zonelists initialization", v1. This is aimed at cleaning up the zonelists initialization code we have but the primary motivation was bug report [2] which got resolved but the usage of stop_machine is just too ugly to live. Most patches are straightforward but 3 of them need a special consideration. Patch 1 removes zone ordered zonelists completely. I am CCing linux-api because this is a user visible change. As I argue in the patch description I do not think we have a strong usecase for it these days. I have kept sysctl in place and warn into the log if somebody tries to configure zone lists ordering. If somebody has a real usecase for it we can revert this patch but I do not expect anybody will actually notice runtime differences. This patch is not strictly needed for the rest but it made patch 6 easier to implement. Patch 7 removes stop_machine from build_all_zonelists without adding any special synchronization between iterators and updater which I _believe_ is acceptable as explained in the changelog. I hope I am not missing anything. Patch 8 then removes zonelists_mutex which is kind of ugly as well and not really needed AFAICS but a care should be taken when double checking my thinking. This patch (of 9): Supporting zone ordered zonelists costs us just a lot of code while the usefulness is arguable if existent at all. Mel has already made node ordering default on 64b systems. 32b systems are still using ZONELIST_ORDER_ZONE because it is considered better to fallback to a different NUMA node rather than consume precious lowmem zones. This argument is, however, weaken by the fact that the memory reclaim has been reworked to be node rather than zone oriented. This means that lowmem requests have to skip over all highmem pages on LRUs already and so zone ordering doesn't save the reclaim time much. So the only advantage of the zone ordering is under a light memory pressure when highmem requests do not ever hit into lowmem zones and the lowmem pressure doesn't need to reclaim. Considering that 32b NUMA systems are rather suboptimal already and it is generally advisable to use 64b kernel on such a HW I believe we should rather care about the code maintainability and just get rid of ZONELIST_ORDER_ZONE altogether. Keep systcl in place and warn if somebody tries to set zone ordering either from kernel command line or the sysctl. [mhocko@suse.com: reading vm.numa_zonelist_order will never terminate] Link: http://lkml.kernel.org/r/20170721143915.14161-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Shaohua Li Cc: Toshi Kani Cc: Abdul Haleem Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fc14b8b3f6ce..bfdc37b77d88 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -896,7 +896,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, extern int numa_zonelist_order_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; -#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ +#define NUMA_ZONELIST_ORDER_LEN 16 #ifndef CONFIG_NEED_MULTIPLE_NODES -- cgit From 72675e131eb418c78980c1e683c0c25a25b61221 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Sep 2017 16:20:24 -0700 Subject: mm, memory_hotplug: drop zone from build_all_zonelists build_all_zonelists gets a zone parameter to initialize zone's pagesets. There is only a single user which gives a non-NULL zone parameter and that one doesn't really need the rest of the build_all_zonelists (see commit 6dcd73d7011b ("memory-hotplug: allocate zone's pcp before onlining pages")). Therefore remove setup_zone_pageset from build_all_zonelists and call it from its only user directly. This will also remove a pointless zonlists rebuilding which is always good. Link: http://lkml.kernel.org/r/20170721143915.14161-5-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Shaohua Li Cc: Toshi Kani Cc: Wen Congyang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bfdc37b77d88..551f68bec2fa 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -771,7 +771,7 @@ static inline bool is_dev_zone(const struct zone *zone) #include extern struct mutex zonelists_mutex; -void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); +void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags, -- cgit From b93e0f329e24f3615aa551fd9b99a75fb7c9195f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Sep 2017 16:20:37 -0700 Subject: mm, memory_hotplug: get rid of zonelists_mutex zonelists_mutex was introduced by commit 4eaf3f64397c ("mem-hotplug: fix potential race while building zonelist for new populated zone") to protect zonelist building from races. This is no longer needed though because both memory online and offline are fully serialized. New users have grown since then. Notably setup_per_zone_wmarks wants to prevent from races between memory hotplug, khugepaged setup and manual min_free_kbytes update via sysctl (see cfd3da1e49bb ("mm: Serialize access to min_free_kbytes"). Let's add a private lock for that purpose. This will not prevent from seeing halfway through memory hotplug operation but that shouldn't be a big deal becuse memory hotplug will update watermarks explicitly so we will eventually get a full picture. The lock just makes sure we won't race when updating watermarks leading to weird results. Also __build_all_zonelists manipulates global data so add a private lock for it as well. This doesn't seem to be necessary today but it is more robust to have a lock there. While we are at it make sure we document that memory online/offline depends on a full serialization either via mem_hotplug_begin() or device_lock. Link: http://lkml.kernel.org/r/20170721143915.14161-9-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Shaohua Li Cc: Toshi Kani Cc: Vlastimil Babka Cc: Haicheng Li Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 551f68bec2fa..e7e92c8f4883 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -770,7 +770,6 @@ static inline bool is_dev_zone(const struct zone *zone) #include -extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, -- cgit From 26b433d0da062d6e19d75350c0171d3cf8ff560d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:15 -0700 Subject: fscache: remove unused ->now_uncached callback Patch series "Ranged pagevec lookup", v2. In this series I make pagevec_lookup() update the index (to be consistent with pagevec_lookup_tag() and also as a preparation for ranged lookups), provide ranged variant of pagevec_lookup() and use it in places where it makes sense. This not only removes some common code but is also a measurable performance win for some use cases (see patch 4/10) where radix tree is sparse and searching & grabing of a page after the end of the range has measurable overhead. This patch (of 10): The callback doesn't ever get called. Remove it. Link: http://lkml.kernel.org/r/20170726114704.7626-2-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fscache.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 115bb81912cc..f4ff47d4a893 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -143,15 +143,6 @@ struct fscache_cookie_def { void (*mark_page_cached)(void *cookie_netfs_data, struct address_space *mapping, struct page *page); - - /* indicate the cookie is no longer cached - * - this function is called when the backing store currently caching - * a cookie is removed - * - the netfs should use this to clean up any markers indicating - * cached pages - * - this is mandatory for any object that may have data - */ - void (*now_uncached)(void *cookie_netfs_data); }; /* -- cgit From d72dc8a25afc71ce90ee92bdd77550e9beb85d4d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:18 -0700 Subject: mm: make pagevec_lookup() update index Make pagevec_lookup() (and underlying find_get_pages()) update index to the next page where iteration should continue. Most callers want this and also pagevec_lookup_tag() already does this. Link: http://lkml.kernel.org/r/20170726114704.7626-3-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 +- include/linux/pagevec.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 79b36f57c3ba..249b1b5964c7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -353,7 +353,7 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); -unsigned find_get_pages(struct address_space *mapping, pgoff_t start, +unsigned find_get_pages(struct address_space *mapping, pgoff_t *start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index b45d391b4540..c395a5bb58b2 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -28,7 +28,7 @@ unsigned pagevec_lookup_entries(struct pagevec *pvec, pgoff_t *indices); void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, - pgoff_t start, unsigned nr_pages); + pgoff_t *start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, int tag, unsigned nr_pages); -- cgit From b947cee4b96306037e166ff1ea5156c0ecdd7d91 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:21 -0700 Subject: mm: implement find_get_pages_range() Implement a variant of find_get_pages() that stops iterating at given index. This may be substantial performance gain if the mapping is sparse. See following commit for details. Furthermore lots of users of this function (through pagevec_lookup()) actually want a range lookup and all of them are currently open-coding this. Also create corresponding pagevec_lookup_range() function. Link: http://lkml.kernel.org/r/20170726114704.7626-4-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 12 ++++++++++-- include/linux/pagevec.h | 13 +++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 249b1b5964c7..5bbd6780f205 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -353,8 +353,16 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); -unsigned find_get_pages(struct address_space *mapping, pgoff_t *start, - unsigned int nr_pages, struct page **pages); +unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, + pgoff_t end, unsigned int nr_pages, + struct page **pages); +static inline unsigned find_get_pages(struct address_space *mapping, + pgoff_t *start, unsigned int nr_pages, + struct page **pages) +{ + return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages, + pages); +} unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index c395a5bb58b2..7df056910437 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -27,8 +27,17 @@ unsigned pagevec_lookup_entries(struct pagevec *pvec, pgoff_t start, unsigned nr_entries, pgoff_t *indices); void pagevec_remove_exceptionals(struct pagevec *pvec); -unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, - pgoff_t *start, unsigned nr_pages); +unsigned pagevec_lookup_range(struct pagevec *pvec, + struct address_space *mapping, + pgoff_t *start, pgoff_t end, unsigned nr_pages); +static inline unsigned pagevec_lookup(struct pagevec *pvec, + struct address_space *mapping, + pgoff_t *start, unsigned nr_pages) +{ + return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1, + nr_pages); +} + unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, int tag, unsigned nr_pages); -- cgit From 397162ffa2ed1cadffe05c324c6ddc53647f9c62 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 6 Sep 2017 16:21:43 -0700 Subject: mm: remove nr_pages argument from pagevec_lookup{,_range}() All users of pagevec_lookup() and pagevec_lookup_range() now pass PAGEVEC_SIZE as a desired number of pages. Just drop the argument. Link: http://lkml.kernel.org/r/20170726114704.7626-11-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 7df056910437..4dcd5506f1ed 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -29,13 +29,12 @@ unsigned pagevec_lookup_entries(struct pagevec *pvec, void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup_range(struct pagevec *pvec, struct address_space *mapping, - pgoff_t *start, pgoff_t end, unsigned nr_pages); + pgoff_t *start, pgoff_t end); static inline unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, - pgoff_t *start, unsigned nr_pages) + pgoff_t *start) { - return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1, - nr_pages); + return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } unsigned pagevec_lookup_tag(struct pagevec *pvec, -- cgit From 04fecbf51b3cb79a628d50b65797c4866342b8d2 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 6 Sep 2017 16:22:09 -0700 Subject: mm: memcontrol: use int for event/state parameter in several functions Several functions use an enum type as parameter for an event/state, but are called in some locations with an argument of a different enum type. Adjust the interface of these functions to reality by changing the parameter to int. This fixes a ton of enum-conversion warnings that are generated when building the kernel with clang. [mka@chromium.org: also change parameter type of inc/dec/mod_memcg_page_state()] Link: http://lkml.kernel.org/r/20170728213442.93823-1-mka@chromium.org Link: http://lkml.kernel.org/r/20170727211004.34435-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Doug Anderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 52 ++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9b15a4bcfa77..69966c461d1c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -488,8 +488,9 @@ struct mem_cgroup *lock_page_memcg(struct page *page); void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) + int idx) { long val = 0; int cpu; @@ -503,15 +504,17 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, return val; } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) + int idx, int val) { if (!mem_cgroup_disabled()) __this_cpu_add(memcg->stat->count[idx], val); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) + int idx, int val) { if (!mem_cgroup_disabled()) this_cpu_add(memcg->stat->count[idx], val); @@ -535,14 +538,14 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, * Kernel pages are an exception to this, since they'll never move. */ static inline void __mod_memcg_page_state(struct page *page, - enum memcg_stat_item idx, int val) + int idx, int val) { if (page->mem_cgroup) __mod_memcg_state(page->mem_cgroup, idx, val); } static inline void mod_memcg_page_state(struct page *page, - enum memcg_stat_item idx, int val) + int idx, int val) { if (page->mem_cgroup) mod_memcg_state(page->mem_cgroup, idx, val); @@ -632,8 +635,9 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, this_cpu_add(memcg->stat->events[idx], count); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void count_memcg_page_event(struct page *page, - enum memcg_stat_item idx) + int idx) { if (page->mem_cgroup) count_memcg_events(page->mem_cgroup, idx, 1); @@ -846,31 +850,31 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) } static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) + int idx) { return 0; } static inline void __mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, + int idx, int nr) { } static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, + int idx, int nr) { } static inline void __mod_memcg_page_state(struct page *page, - enum memcg_stat_item idx, + int idx, int nr) { } static inline void mod_memcg_page_state(struct page *page, - enum memcg_stat_item idx, + int idx, int nr) { } @@ -924,7 +928,7 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, } static inline void count_memcg_page_event(struct page *page, - enum memcg_stat_item idx) + int idx) { } @@ -934,26 +938,30 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) } #endif /* CONFIG_MEMCG */ +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __inc_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) + int idx) { __mod_memcg_state(memcg, idx, 1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __dec_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) + int idx) { __mod_memcg_state(memcg, idx, -1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __inc_memcg_page_state(struct page *page, - enum memcg_stat_item idx) + int idx) { __mod_memcg_page_state(page, idx, 1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __dec_memcg_page_state(struct page *page, - enum memcg_stat_item idx) + int idx) { __mod_memcg_page_state(page, idx, -1); } @@ -982,26 +990,30 @@ static inline void __dec_lruvec_page_state(struct page *page, __mod_lruvec_page_state(page, idx, -1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void inc_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) + int idx) { mod_memcg_state(memcg, idx, 1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void dec_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx) + int idx) { mod_memcg_state(memcg, idx, -1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void inc_memcg_page_state(struct page *page, - enum memcg_stat_item idx) + int idx) { mod_memcg_page_state(page, idx, 1); } +/* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void dec_memcg_page_state(struct page *page, - enum memcg_stat_item idx) + int idx) { mod_memcg_page_state(page, idx, -1); } -- cgit From e07098294adfd03d582af7626752255e3d170393 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:22:16 -0700 Subject: mm, THP, swap: support to reclaim swap space for THP swapped out The normal swap slot reclaiming can be done when the swap count reaches SWAP_HAS_CACHE. But for the swap slot which is backing a THP, all swap slots backing one THP must be reclaimed together, because the swap slot may be used again when the THP is swapped out again later. So the swap slots backing one THP can be reclaimed together when the swap count for all swap slots for the THP reached SWAP_HAS_CACHE. In the patch, the functions to check whether the swap count for all swap slots backing one THP reached SWAP_HAS_CACHE are implemented and used when checking whether a swap slot can be reclaimed. To make it easier to determine whether a swap slot is backing a THP, a new swap cluster flag named CLUSTER_FLAG_HUGE is added to mark a swap cluster which is backing a THP (Transparent Huge Page). Because THP swap in as a whole isn't supported now. After deleting the THP from the swap cache (for example, swapping out finished), the CLUSTER_FLAG_HUGE flag will be cleared. So that, the normal pages inside THP can be swapped in individually. [ying.huang@intel.com: fix swap_page_trans_huge_swapped on HDD] Link: http://lkml.kernel.org/r/874ltsm0bi.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/20170724051840.2309-3-ying.huang@intel.com Signed-off-by: "Huang, Ying" Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Minchan Kim Cc: Hugh Dickins Cc: Shaohua Li Cc: "Kirill A . Shutemov" Cc: Andrea Arcangeli Cc: Dan Williams Cc: Jens Axboe Cc: Michal Hocko Cc: Ross Zwisler [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index d83d28e53e62..964b4f1fba4a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -188,6 +188,7 @@ struct swap_cluster_info { }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ +#define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */ /* * We assign a cluster to each CPU, so each CPU can allocate swap entry from -- cgit From ba3c4ce6def4915093be80585ff69f780630f32f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:22:19 -0700 Subject: mm, THP, swap: make reuse_swap_page() works for THP swapped out After supporting to delay THP (Transparent Huge Page) splitting after swapped out, it is possible that some page table mappings of the THP are turned into swap entries. So reuse_swap_page() need to check the swap count in addition to the map count as before. This patch done that. In the huge PMD write protect fault handler, in addition to the page map count, the swap count need to be checked too, so the page lock need to be acquired too when calling reuse_swap_page() in addition to the page table lock. [ying.huang@intel.com: silence a compiler warning] Link: http://lkml.kernel.org/r/87bmnzizjy.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/20170724051840.2309-4-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Minchan Kim Cc: Hugh Dickins Cc: Shaohua Li Cc: Rik van Riel Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Dan Williams Cc: Jens Axboe Cc: Michal Hocko Cc: Ross Zwisler [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 964b4f1fba4a..7176ba780e83 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -510,8 +510,8 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -#define reuse_swap_page(page, total_mapcount) \ - (page_trans_huge_mapcount(page, total_mapcount) == 1) +#define reuse_swap_page(page, total_map_swapcount) \ + (page_trans_huge_mapcount(page, total_map_swapcount) == 1) static inline int try_to_free_swap(struct page *page) { -- cgit From 225311a46411c37e20e73d99f4382f141e12f6f9 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:22:30 -0700 Subject: mm: test code to write THP to swap device as a whole To support delay splitting THP (Transparent Huge Page) after swapped out, we need to enhance swap writing code to support to write a THP as a whole. This will improve swap write IO performance. As Ming Lei pointed out, this should be based on multipage bvec support, which hasn't been merged yet. So this patch is only for testing the functionality of the other patches in the series. And will be reimplemented after multipage bvec support is merged. Link: http://lkml.kernel.org/r/20170724051840.2309-7-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: "Kirill A . Shutemov" Cc: Andrea Arcangeli Cc: Dan Williams Cc: Hugh Dickins Cc: Jens Axboe Cc: Johannes Weiner Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Ross Zwisler [for brd.c, zram_drv.c, pmem.c] Cc: Shaohua Li Cc: Vishal L Verma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bio.h | 8 ++++++++ include/linux/page-flags.h | 4 ++-- include/linux/vm_event_item.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b1cf4ba0902..1f0720de8990 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -38,7 +38,15 @@ #define BIO_BUG_ON #endif +#ifdef CONFIG_THP_SWAP +#if HPAGE_PMD_NR > 256 +#define BIO_MAX_PAGES HPAGE_PMD_NR +#else #define BIO_MAX_PAGES 256 +#endif +#else +#define BIO_MAX_PAGES 256 +#endif #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d33e3280c8ad..ba2d470d2d0a 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -303,8 +303,8 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) * Only test-and-set exist for PG_writeback. The unconditional operators are * risky: they bypass page accounting. */ -TESTPAGEFLAG(Writeback, writeback, PF_NO_COMPOUND) - TESTSCFLAG(Writeback, writeback, PF_NO_COMPOUND) +TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) + TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 37e8d31a4632..c75024e80eed 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -85,6 +85,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, + THP_SWPOUT, #endif #ifdef CONFIG_MEMORY_BALLOON BALLOON_INFLATE, -- cgit From 59807685a7e77e8c8fe5925613968841538d53d7 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:22:34 -0700 Subject: mm, THP, swap: support splitting THP for THP swap out After adding swapping out support for THP (Transparent Huge Page), it is possible that a THP in swap cache (partly swapped out) need to be split. To split such a THP, the swap cluster backing the THP need to be split too, that is, the CLUSTER_FLAG_HUGE flag need to be cleared for the swap cluster. The patch implemented this. And because the THP swap writing needs the THP keeps as huge page during writing. The PageWriteback flag is checked before splitting. Link: http://lkml.kernel.org/r/20170724051840.2309-8-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Minchan Kim Cc: Hugh Dickins Cc: Shaohua Li Cc: Rik van Riel Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Dan Williams Cc: Jens Axboe Cc: Michal Hocko Cc: Ross Zwisler [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7176ba780e83..461cf107ad52 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -527,6 +527,15 @@ static inline swp_entry_t get_swap_page(struct page *page) #endif /* CONFIG_SWAP */ +#ifdef CONFIG_THP_SWAP +extern int split_swap_cluster(swp_entry_t entry); +#else +static inline int split_swap_cluster(swp_entry_t entry) +{ + return 0; +} +#endif + #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { -- cgit From fe490cc0fe9e6ee48cc48bb5dc463bc5f0f1428f Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:22:52 -0700 Subject: mm, THP, swap: add THP swapping out fallback counting When swapping out THP (Transparent Huge Page), instead of swapping out the THP as a whole, sometimes we have to fallback to split the THP into normal pages before swapping, because no free swap clusters are available, or cgroup limit is exceeded, etc. To count the number of the fallback, a new VM event THP_SWPOUT_FALLBACK is added, and counted when we fallback to split the THP. Link: http://lkml.kernel.org/r/20170724051840.2309-13-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Minchan Kim Cc: Hugh Dickins Cc: Shaohua Li Cc: Rik van Riel Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Michal Hocko Cc: Dan Williams Cc: Jens Axboe Cc: Ross Zwisler [for brd.c, zram_drv.c, pmem.c] Cc: Vishal L Verma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index c75024e80eed..e02820fc2861 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -86,6 +86,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, THP_SWPOUT, + THP_SWPOUT_FALLBACK, #endif #ifdef CONFIG_MEMORY_BALLOON BALLOON_INFLATE, -- cgit From 8d10396342063c79e92c4e46215370ab7b988569 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 6 Sep 2017 16:23:02 -0700 Subject: userfaultfd: shmem: add shmem_mfill_zeropage_pte for userfaultfd support shmem_mfill_zeropage_pte is the low level routine that implements the userfaultfd UFFDIO_ZEROPAGE command. Since for shmem mappings zero pages are always allocated and accounted, the new method is a slight extension of the existing shmem_mcopy_atomic_pte. Link: http://lkml.kernel.org/r/1497939652-16528-4-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Hugh Dickins Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a7d6bd2a918f..b6c3540e07bc 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -137,9 +137,15 @@ extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); +extern int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr); #else #define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) +#define shmem_mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, \ + dst_addr) ({ BUG(); 0; }) #endif #endif -- cgit From a446d6f9ce545248c62ab9694d05ac6f68563706 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Sep 2017 16:23:22 -0700 Subject: include/linux/fs.h: remove unneeded forward definition of mm_struct Link: http://lkml.kernel.org/r/20170525102927.6163-1-jlayton@redhat.com Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index cc2e0f5a8fd1..baea880c5c03 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1270,8 +1270,6 @@ extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); -struct mm_struct; - /* * Umount options */ -- cgit From e652f694598273c5d749687032d1534a30e6a3a5 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:25 -0700 Subject: mm: hugetlb: define system call hugetlb size encodings in single file Patch series "Consolidate system call hugetlb page size encodings". These patches are the result of discussions in https://lkml.org/lkml/2017/3/8/548. The following changes are made in the patch set: 1) Put all the log2 encoded huge page size definitions in a common header file. The idea is have a set of definitions that can be use as the basis for system call specific definitions such as MAP_HUGE_* and SHM_HUGE_*. 2) Remove MAP_HUGE_* definitions in arch specific files. All these definitions are the same. Consolidate all definitions in the primary user header file (uapi/linux/mman.h). 3) Remove SHM_HUGE_* definitions intended for user space from kernel header file, and add to user (uapi/linux/shm.h) header file. Add definitions for all known huge page size encodings as in mmap. This patch (of 3): If hugetlb pages are requested in mmap or shmget system calls, a huge page size other than default can be requested. This is accomplished by encoding the log2 of the huge page size in the upper bits of the flag argument. asm-generic and arch specific headers all define the same values for these encodings. Put common definitions in a single header file. The primary uapi header files for mmap and shm will use these definitions as a basis for definitions specific to those system calls. Link: http://lkml.kernel.org/r/1501527386-10736-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Andi Kleen Cc: Michael Kerrisk Cc: Davidlohr Bueso Cc: Anshuman Khandual Cc: Aneesh Kumar K.V Cc: Andrea Arcangeli Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/hugetlb_encode.h | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/uapi/asm-generic/hugetlb_encode.h (limited to 'include') diff --git a/include/uapi/asm-generic/hugetlb_encode.h b/include/uapi/asm-generic/hugetlb_encode.h new file mode 100644 index 000000000000..e4732d3c2998 --- /dev/null +++ b/include/uapi/asm-generic/hugetlb_encode.h @@ -0,0 +1,34 @@ +#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_ +#define _ASM_GENERIC_HUGETLB_ENCODE_H_ + +/* + * Several system calls take a flag to request "hugetlb" huge pages. + * Without further specification, these system calls will use the + * system's default huge page size. If a system supports multiple + * huge page sizes, the desired huge page size can be specified in + * bits [26:31] of the flag arguments. The value in these 6 bits + * will encode the log2 of the huge page size. + * + * The following definitions are associated with this huge page size + * encoding in flag arguments. System call specific header files + * that use this encoding should include this file. They can then + * provide definitions based on these with their own specific prefix. + * for example: + * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT + */ + +#define HUGETLB_FLAG_ENCODE_SHIFT 26 +#define HUGETLB_FLAG_ENCODE_MASK 0x3f + +#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) + +#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ -- cgit From aafd4562dfee81a40ba21b5ea3cf5e06664bc7f6 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:29 -0700 Subject: mm: arch: consolidate mmap hugetlb size encodings A non-default huge page size can be encoded in the flags argument of the mmap system call. The definitions for these encodings are in arch specific header files. However, all architectures use the same values. Consolidate all the definitions in the primary user header file (uapi/linux/mman.h). Include definitions for all known huge page sizes. Use the generic encoding definitions in hugetlb_encode.h as the basis for these definitions. Link: http://lkml.kernel.org/r/1501527386-10736-3-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Davidlohr Bueso Cc: Matthew Wilcox Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/mman-common.h | 11 ----------- include/uapi/linux/mman.h | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 8c27db0c5c08..d248f3c335b5 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -61,17 +61,6 @@ /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index ade4acd3a90c..a937480d7cd3 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -2,6 +2,7 @@ #define _UAPI_LINUX_MMAN_H #include +#include #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 @@ -10,4 +11,25 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + #endif /* _UAPI_LINUX_MMAN_H */ -- cgit From 4da243ac1cf6aeb30b7c555d56208982d66d6d33 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:33 -0700 Subject: mm: shm: use new hugetlb size encoding definitions Use the common definitions from hugetlb_encode.h header file for encoding hugetlb size definitions in shmget system call flags. In addition, move these definitions from the internal (kernel) to user (uapi) header file. Link: http://lkml.kernel.org/r/1501527386-10736-4-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: Matthew Wilcox Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Davidlohr Bueso Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shm.h | 17 ----------------- include/uapi/linux/shm.h | 31 +++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/shm.h b/include/linux/shm.h index 0fb7061ec54c..21a5e6c43385 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -27,23 +27,6 @@ struct shmid_kernel /* private to the kernel */ /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ #define SHM_LOCKED 02000 /* segment will not be swapped */ -#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ -#define SHM_NORESERVE 010000 /* don't check for reservations */ - -/* Bits [26:31] are reserved */ - -/* - * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define SHM_HUGE_SHIFT 26 -#define SHM_HUGE_MASK 0x3f -#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) -#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) #ifdef CONFIG_SYSVIPC struct sysv_shm { diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index 1fbf24ea37fd..cf23c873719d 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -3,6 +3,7 @@ #include #include +#include #ifndef __KERNEL__ #include #endif @@ -40,11 +41,37 @@ struct shmid_ds { /* Include the definition of shmid64_ds and shminfo64 */ #include -/* permission flag for shmget */ +/* + * shmget() shmflg values. + */ +/* The bottom nine bits are the same as open(2) mode flags */ #define SHM_R 0400 /* or S_IRUGO from */ #define SHM_W 0200 /* or S_IWUGO from */ +/* Bits 9 & 10 are IPC_CREAT and IPC_EXCL */ +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_NORESERVE 010000 /* don't check for reservations */ + +/* + * Huge page size encoding when SHM_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h + */ +#define SHM_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define SHM_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define SHM_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define SHM_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define SHM_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB -/* mode for attach */ +/* + * shmat() shmflg values + */ #define SHM_RDONLY 010000 /* read-only access */ #define SHM_RND 020000 /* round attach address to SHMLBA boundary */ #define SHM_REMAP 040000 /* take-over region on attach */ -- cgit From c41f012ade0b95b0a6e25c7150673e0554736165 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Sep 2017 16:23:36 -0700 Subject: mm: rename global_page_state to global_zone_page_state global_page_state is error prone as a recent bug report pointed out [1]. It only returns proper values for zone based counters as the enum it gets suggests. We already have global_node_page_state so let's rename global_page_state to global_zone_page_state to be more explicit here. All existing users seems to be correct: $ git grep "global_page_state(NR_" | sed 's@.*(\(NR_[A-Z_]*\)).*@\1@' | sort | uniq -c 2 NR_BOUNCE 2 NR_FREE_CMA_PAGES 11 NR_FREE_PAGES 1 NR_KERNEL_STACK_KB 1 NR_MLOCK 2 NR_PAGETABLE This patch shouldn't introduce any functional change. [1] http://lkml.kernel.org/r/201707260628.v6Q6SmaS030814@www262.sakura.ne.jp Link: http://lkml.kernel.org/r/20170801134256.5400-2-hannes@cmpxchg.org Signed-off-by: Michal Hocko Signed-off-by: Johannes Weiner Cc: Tetsuo Handa Cc: Josef Bacik Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++-- include/linux/vmstat.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 461cf107ad52..76f1632eea5a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -263,8 +263,8 @@ extern unsigned long totalreserve_pages; extern unsigned long nr_free_buffer_pages(void); extern unsigned long nr_free_pagecache_pages(void); -/* Definition of global_page_state not available yet */ -#define nr_free_pages() global_page_state(NR_FREE_PAGES) +/* Definition of global_zone_page_state not available yet */ +#define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) /* linux/mm/swap.c */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b3d85f30d424..97e11ab573f0 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -123,7 +123,7 @@ static inline void node_page_state_add(long x, struct pglist_data *pgdat, atomic_long_add(x, &vm_node_stat[item]); } -static inline unsigned long global_page_state(enum zone_stat_item item) +static inline unsigned long global_zone_page_state(enum zone_stat_item item) { long x = atomic_long_read(&vm_zone_stat[item]); #ifdef CONFIG_SMP @@ -199,7 +199,7 @@ extern unsigned long sum_zone_node_page_state(int node, extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); #else -#define sum_zone_node_page_state(node, item) global_page_state(item) +#define sum_zone_node_page_state(node, item) global_zone_page_state(item) #define node_page_state(node, item) global_node_page_state(item) #endif /* CONFIG_NUMA */ -- cgit From 2d6d6f5a09a96cc1fec7ed992b825e05f64cb50e Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Wed, 6 Sep 2017 16:23:39 -0700 Subject: mm: userfaultfd: add feature to request for a signal delivery In some cases, userfaultfd mechanism should just deliver a SIGBUS signal to the faulting process, instead of the page-fault event. Dealing with page-fault event using a monitor thread can be an overhead in these cases. For example applications like the database could use the signaling mechanism for robustness purpose. Database uses hugetlbfs for performance reason. Files on hugetlbfs filesystem are created and huge pages allocated using fallocate() API. Pages are deallocated/freed using fallocate() hole punching support. These files are mmapped and accessed by many processes as shared memory. The database keeps track of which offsets in the hugetlbfs file have pages allocated. Any access to mapped address over holes in the file, which can occur due to bugs in the application, is considered invalid and expect the process to simply receive a SIGBUS. However, currently when a hole in the file is accessed via the mapped address, kernel/mm attempts to automatically allocate a page at page fault time, resulting in implicitly filling the hole in the file. This may not be the desired behavior for applications like the database that want to explicitly manage page allocations of hugetlbfs files. Using userfaultfd mechanism with this support to get a signal, database application can prevent pages from being allocated implicitly when processes access mapped address over holes in the file. This patch adds UFFD_FEATURE_SIGBUS feature to userfaultfd mechnism to request for a SIGBUS signal. See following for previous discussion about the database requirement leading to this proposal as suggested by Andrea. http://www.spinics.net/lists/linux-mm/msg129224.html Link: http://lkml.kernel.org/r/1501552446-748335-2-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Reviewed-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 3b059530dac9..d39d5db56771 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -23,7 +23,8 @@ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ - UFFD_FEATURE_MISSING_SHMEM) + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -153,6 +154,12 @@ struct uffdio_api { * UFFD_FEATURE_MISSING_SHMEM works the same as * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem * (i.e. tmpfs and other shmem based APIs). + * + * UFFD_FEATURE_SIGBUS feature means no page-fault + * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead + * a SIGBUS signal will be sent to the faulting process. + * The application process can enable this behavior by adding + * it to uffdio_api.features. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -161,6 +168,7 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) +#define UFFD_FEATURE_SIGBUS (1<<7) __u64 features; __u64 ioctls; -- cgit From 9d4ac934829ac58c5109c49e6dfe677300e5e652 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Wed, 6 Sep 2017 16:23:56 -0700 Subject: userfaultfd: provide pid in userfault msg It could be useful for calculating downtime during postcopy live migration per vCPU. Side observer or application itself will be informed about proper task's sleep during userfaultfd processing. Process's thread id is being provided when user requeste it by setting UFFD_FEATURE_THREAD_ID bit into uffdio_api.features. Link: http://lkml.kernel.org/r/20170802165145.22628-6-aarcange@redhat.com Signed-off-by: Alexey Perevalov Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Maxime Coquelin Cc: Mike Kravetz Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index d39d5db56771..2b24c28d99a7 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -24,7 +24,8 @@ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM | \ - UFFD_FEATURE_SIGBUS) + UFFD_FEATURE_SIGBUS | \ + UFFD_FEATURE_THREAD_ID) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -79,6 +80,7 @@ struct uffd_msg { struct { __u64 flags; __u64 address; + __u32 ptid; } pagefault; struct { @@ -158,8 +160,9 @@ struct uffdio_api { * UFFD_FEATURE_SIGBUS feature means no page-fault * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead * a SIGBUS signal will be sent to the faulting process. - * The application process can enable this behavior by adding - * it to uffdio_api.features. + * + * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will + * be returned, if feature is not requested 0 will be returned. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -169,6 +172,7 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) #define UFFD_FEATURE_SIGBUS (1<<7) +#define UFFD_FEATURE_THREAD_ID (1<<8) __u64 features; __u64 ioctls; -- cgit From a36985d31a65d5c0559fb582719e32eaf0ccec3b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 6 Sep 2017 16:23:59 -0700 Subject: userfaultfd: provide pid in userfault msg - add feat union No ABI change, but this will make it more explicit to software that ptid is only available if requested by passing UFFD_FEATURE_THREAD_ID to UFFDIO_API. The fact it's a union will also self document it shouldn't be taken for granted there's a tpid there. Link: http://lkml.kernel.org/r/20170802165145.22628-7-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Alexey Perevalov Cc: Maxime Coquelin Cc: Mike Kravetz Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 2b24c28d99a7..d6d1f65cb3c3 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -80,7 +80,9 @@ struct uffd_msg { struct { __u64 flags; __u64 address; - __u32 ptid; + union { + __u32 ptid; + } feat; } pagefault; struct { -- cgit From 749df87bd7bee5a79cef073f5d032ddb2b211de8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:24:16 -0700 Subject: mm/shmem: add hugetlbfs support to memfd_create() This patch came out of discussions in this e-mail thread: http://lkml.kernel.org/r/1499357846-7481-1-git-send-email-mike.kravetz%40oracle.com The Oracle JVM team is developing a new garbage collection model. This new model requires multiple mappings of the same anonymous memory. One straight forward way to accomplish this is with memfd_create. They can use the returned fd to create multiple mappings of the same memory. The JVM today has an option to use (static hugetlb) huge pages. If this option is specified, they would like to use the same garbage collection model requiring multiple mappings to the same memory. Using hugetlbfs, it is possible to explicitly mount a filesystem and specify file paths in order to get an fd that can be used for multiple mappings. However, this introduces additional system admin work and coordination. Ideally they would like to get a hugetlbfs fd without requiring explicit mounting of a filesystem. Today, mmap and shmget can make use of hugetlbfs without explicitly mounting a filesystem. The patch adds this functionality to memfd_create. Add a new flag MFD_HUGETLB to memfd_create() that will specify the file to be created resides in the hugetlbfs filesystem. This is the generic hugetlbfs filesystem not associated with any specific mount point. As with other system calls that request hugetlbfs backed pages, there is the ability to encode huge page size in the flag arguments. hugetlbfs does not support sealing operations, therefore specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL. Of course, the memfd_man page would need updating if this type of functionality moves forward. Link: http://lkml.kernel.org/r/1502149672-7759-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/memfd.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h index 534e364bda92..7f3a722dbd72 100644 --- a/include/uapi/linux/memfd.h +++ b/include/uapi/linux/memfd.h @@ -1,8 +1,32 @@ #ifndef _UAPI_LINUX_MEMFD_H #define _UAPI_LINUX_MEMFD_H +#include + /* flags for memfd_create(2) (unsigned int) */ #define MFD_CLOEXEC 0x0001U #define MFD_ALLOW_SEALING 0x0002U +#define MFD_HUGETLB 0x0004U + +/* + * Huge page size encoding when MFD_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB #endif /* _UAPI_LINUX_MEMFD_H */ -- cgit From cbc65df240c104bf540af1ad58595bf1eaa5ee10 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:24:29 -0700 Subject: mm, swap: add swap readahead hit statistics Patch series "mm, swap: VMA based swap readahead", v4. The swap readahead is an important mechanism to reduce the swap in latency. Although pure sequential memory access pattern isn't very popular for anonymous memory, the space locality is still considered valid. In the original swap readahead implementation, the consecutive blocks in swap device are readahead based on the global space locality estimation. But the consecutive blocks in swap device just reflect the order of page reclaiming, don't necessarily reflect the access pattern in virtual memory space. And the different tasks in the system may have different access patterns, which makes the global space locality estimation incorrect. In this patchset, when page fault occurs, the virtual pages near the fault address will be readahead instead of the swap slots near the fault swap slot in swap device. This avoid to readahead the unrelated swap slots. At the same time, the swap readahead is changed to work on per-VMA from globally. So that the different access patterns of the different VMAs could be distinguished, and the different readahead policy could be applied accordingly. The original core readahead detection and scaling algorithm is reused, because it is an effect algorithm to detect the space locality. In addition to the swap readahead changes, some new sysfs interface is added to show the efficiency of the readahead algorithm and some other swap statistics. This new implementation will incur more small random read, on SSD, the improved correctness of estimation and readahead target should beat the potential increased overhead, this is also illustrated in the test results below. But on HDD, the overhead may beat the benefit, so the original implementation will be used by default. The test and result is as follow, Common test condition ===================== Test Machine: Xeon E5 v3 (2 sockets, 72 threads, 32G RAM) Swap device: NVMe disk Micro-benchmark with combined access pattern ============================================ vm-scalability, sequential swap test case, 4 processes to eat 50G virtual memory space, repeat the sequential memory writing until 300 seconds. The first round writing will trigger swap out, the following rounds will trigger sequential swap in and out. At the same time, run vm-scalability random swap test case in background, 8 processes to eat 30G virtual memory space, repeat the random memory write until 300 seconds. This will trigger random swap-in in the background. This is a combined workload with sequential and random memory accessing at the same time. The result (for sequential workload) is as follow, Base Optimized ---- --------- throughput 345413 KB/s 414029 KB/s (+19.9%) latency.average 97.14 us 61.06 us (-37.1%) latency.50th 2 us 1 us latency.60th 2 us 1 us latency.70th 98 us 2 us latency.80th 160 us 2 us latency.90th 260 us 217 us latency.95th 346 us 369 us latency.99th 1.34 ms 1.09 ms ra_hit% 52.69% 99.98% The original swap readahead algorithm is confused by the background random access workload, so readahead hit rate is lower. The VMA-base readahead algorithm works much better. Linpack ======= The test memory size is bigger than RAM to trigger swapping. Base Optimized ---- --------- elapsed_time 393.49 s 329.88 s (-16.2%) ra_hit% 86.21% 98.82% The score of base and optimized kernel hasn't visible changes. But the elapsed time reduced and readahead hit rate improved, so the optimized kernel runs better for startup and tear down stages. And the absolute value of readahead hit rate is high, shows that the space locality is still valid in some practical workloads. This patch (of 5): The statistics for total readahead pages and total readahead hits are recorded and exported via the following sysfs interface. /sys/kernel/mm/swap/ra_hits /sys/kernel/mm/swap/ra_total With them, the efficiency of the swap readahead could be measured, so that the swap readahead algorithm and parameters could be tuned accordingly. [akpm@linux-foundation.org: don't display swap stats if CONFIG_SWAP=n] Link: http://lkml.kernel.org/r/20170807054038.1843-2-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Hugh Dickins Cc: Fengguang Wu Cc: Tim Chen Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index e02820fc2861..d77bc35278b0 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -105,6 +105,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, VMACACHE_FIND_CALLS, VMACACHE_FIND_HITS, VMACACHE_FULL_FLUSHES, +#endif +#ifdef CONFIG_SWAP + SWAP_RA, + SWAP_RA_HIT, #endif NR_VM_EVENT_ITEMS }; -- cgit From ec560175c0b6fce86994bdf036754d48122c5c87 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:24:36 -0700 Subject: mm, swap: VMA based swap readahead The swap readahead is an important mechanism to reduce the swap in latency. Although pure sequential memory access pattern isn't very popular for anonymous memory, the space locality is still considered valid. In the original swap readahead implementation, the consecutive blocks in swap device are readahead based on the global space locality estimation. But the consecutive blocks in swap device just reflect the order of page reclaiming, don't necessarily reflect the access pattern in virtual memory. And the different tasks in the system may have different access patterns, which makes the global space locality estimation incorrect. In this patch, when page fault occurs, the virtual pages near the fault address will be readahead instead of the swap slots near the fault swap slot in swap device. This avoid to readahead the unrelated swap slots. At the same time, the swap readahead is changed to work on per-VMA from globally. So that the different access patterns of the different VMAs could be distinguished, and the different readahead policy could be applied accordingly. The original core readahead detection and scaling algorithm is reused, because it is an effect algorithm to detect the space locality. The test and result is as follow, Common test condition ===================== Test Machine: Xeon E5 v3 (2 sockets, 72 threads, 32G RAM) Swap device: NVMe disk Micro-benchmark with combined access pattern ============================================ vm-scalability, sequential swap test case, 4 processes to eat 50G virtual memory space, repeat the sequential memory writing until 300 seconds. The first round writing will trigger swap out, the following rounds will trigger sequential swap in and out. At the same time, run vm-scalability random swap test case in background, 8 processes to eat 30G virtual memory space, repeat the random memory write until 300 seconds. This will trigger random swap-in in the background. This is a combined workload with sequential and random memory accessing at the same time. The result (for sequential workload) is as follow, Base Optimized ---- --------- throughput 345413 KB/s 414029 KB/s (+19.9%) latency.average 97.14 us 61.06 us (-37.1%) latency.50th 2 us 1 us latency.60th 2 us 1 us latency.70th 98 us 2 us latency.80th 160 us 2 us latency.90th 260 us 217 us latency.95th 346 us 369 us latency.99th 1.34 ms 1.09 ms ra_hit% 52.69% 99.98% The original swap readahead algorithm is confused by the background random access workload, so readahead hit rate is lower. The VMA-base readahead algorithm works much better. Linpack ======= The test memory size is bigger than RAM to trigger swapping. Base Optimized ---- --------- elapsed_time 393.49 s 329.88 s (-16.2%) ra_hit% 86.21% 98.82% The score of base and optimized kernel hasn't visible changes. But the elapsed time reduced and readahead hit rate improved, so the optimized kernel runs better for startup and tear down stages. And the absolute value of readahead hit rate is high, shows that the space locality is still valid in some practical workloads. Link: http://lkml.kernel.org/r/20170807054038.1843-4-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Hugh Dickins Cc: Fengguang Wu Cc: Tim Chen Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 + include/linux/swap.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 57378c7cb5f8..f45ad815b7d7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -335,6 +335,7 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ + atomic_long_t swap_readahead_info; #ifndef CONFIG_MMU struct vm_region *vm_region; /* NOMMU mapping region */ #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 76f1632eea5a..61d63379e956 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -251,6 +251,25 @@ struct swap_info_struct { struct swap_cluster_list discard_clusters; /* discard clusters list */ }; +#ifdef CONFIG_64BIT +#define SWAP_RA_ORDER_CEILING 5 +#else +/* Avoid stack overflow, because we need to save part of page table */ +#define SWAP_RA_ORDER_CEILING 3 +#define SWAP_RA_PTE_CACHE_SIZE (1 << SWAP_RA_ORDER_CEILING) +#endif + +struct vma_swap_readahead { + unsigned short win; + unsigned short offset; + unsigned short nr_pte; +#ifdef CONFIG_64BIT + pte_t *ptes; +#else + pte_t ptes[SWAP_RA_PTE_CACHE_SIZE]; +#endif +}; + /* linux/mm/workingset.c */ void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); @@ -350,6 +369,7 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, #define SWAP_ADDRESS_SPACE_SHIFT 14 #define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) extern struct address_space *swapper_spaces[]; +extern bool swap_vma_readahead; #define swap_address_space(entry) \ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ >> SWAP_ADDRESS_SPACE_SHIFT]) @@ -362,7 +382,9 @@ extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); -extern struct page *lookup_swap_cache(swp_entry_t); +extern struct page *lookup_swap_cache(swp_entry_t entry, + struct vm_area_struct *vma, + unsigned long addr); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool do_poll); @@ -372,6 +394,17 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, extern struct page *swapin_readahead(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr); +extern struct page *swap_readahead_detect(struct vm_fault *vmf, + struct vma_swap_readahead *swap_ra); +extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, + struct vm_fault *vmf, + struct vma_swap_readahead *swap_ra); + +static inline bool swap_use_vma_readahead(void) +{ + return READ_ONCE(swap_vma_readahead); +} + /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -466,12 +499,32 @@ static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, return NULL; } +static inline bool swap_use_vma_readahead(void) +{ + return false; +} + +static inline struct page *swap_readahead_detect( + struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) +{ + return NULL; +} + +static inline struct page *do_swap_page_readahead( + swp_entry_t fentry, gfp_t gfp_mask, + struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) +{ + return NULL; +} + static inline int swap_writepage(struct page *p, struct writeback_control *wbc) { return 0; } -static inline struct page *lookup_swap_cache(swp_entry_t swp) +static inline struct page *lookup_swap_cache(swp_entry_t swp, + struct vm_area_struct *vma, + unsigned long addr) { return NULL; } -- cgit From 81a0298bdfab0203d360df7c9bf690d1d457f999 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:24:43 -0700 Subject: mm, swap: don't use VMA based swap readahead if HDD is used as swap VMA based swap readahead will readahead the virtual pages that is continuous in the virtual address space. While the original swap readahead will readahead the swap slots that is continuous in the swap device. Although VMA based swap readahead is more correct for the swap slots to be readahead, it will trigger more small random readings, which may cause the performance of HDD (hard disk) to degrade heavily, and may finally exceed the benefit. To avoid the issue, in this patch, if the HDD is used as swap, the VMA based swap readahead will be disabled, and the original swap readahead will be used instead. Link: http://lkml.kernel.org/r/20170807054038.1843-6-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Johannes Weiner Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Hugh Dickins Cc: Fengguang Wu Cc: Tim Chen Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 61d63379e956..9c4ae6f14eea 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -400,16 +400,17 @@ extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, struct vm_fault *vmf, struct vma_swap_readahead *swap_ra); -static inline bool swap_use_vma_readahead(void) -{ - return READ_ONCE(swap_vma_readahead); -} - /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; +extern atomic_t nr_rotate_swap; extern bool has_usable_swap(void); +static inline bool swap_use_vma_readahead(void) +{ + return READ_ONCE(swap_vma_readahead) && !atomic_read(&nr_rotate_swap); +} + /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) { -- cgit From a2468cc9bfdff6139f59ca896671e5819ff5f94a Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Wed, 6 Sep 2017 16:24:57 -0700 Subject: swap: choose swap device according to numa node If the system has more than one swap device and swap device has the node information, we can make use of this information to decide which swap device to use in get_swap_pages() to get better performance. The current code uses a priority based list, swap_avail_list, to decide which swap device to use and if multiple swap devices share the same priority, they are used round robin. This patch changes the previous single global swap_avail_list into a per-numa-node list, i.e. for each numa node, it sees its own priority based list of available swap devices. Swap device's priority can be promoted on its matching node's swap_avail_list. The current swap device's priority is set as: user can set a >=0 value, or the system will pick one starting from -1 then downwards. The priority value in the swap_avail_list is the negated value of the swap device's due to plist being sorted from low to high. The new policy doesn't change the semantics for priority >=0 cases, the previous starting from -1 then downwards now becomes starting from -2 then downwards and -1 is reserved as the promoted value. Take 4-node EX machine as an example, suppose 4 swap devices are available, each sit on a different node: swapA on node 0 swapB on node 1 swapC on node 2 swapD on node 3 After they are all swapped on in the sequence of ABCD. Current behaviour: their priorities will be: swapA: -1 swapB: -2 swapC: -3 swapD: -4 And their position in the global swap_avail_list will be: swapA -> swapB -> swapC -> swapD prio:1 prio:2 prio:3 prio:4 New behaviour: their priorities will be(note that -1 is skipped): swapA: -2 swapB: -3 swapC: -4 swapD: -5 And their positions in the 4 swap_avail_lists[nid] will be: swap_avail_lists[0]: /* node 0's available swap device list */ swapA -> swapB -> swapC -> swapD prio:1 prio:3 prio:4 prio:5 swap_avali_lists[1]: /* node 1's available swap device list */ swapB -> swapA -> swapC -> swapD prio:1 prio:2 prio:4 prio:5 swap_avail_lists[2]: /* node 2's available swap device list */ swapC -> swapA -> swapB -> swapD prio:1 prio:2 prio:3 prio:5 swap_avail_lists[3]: /* node 3's available swap device list */ swapD -> swapA -> swapB -> swapC prio:1 prio:2 prio:3 prio:4 To see the effect of the patch, a test that starts N process, each mmap a region of anonymous memory and then continually write to it at random position to trigger both swap in and out is used. On a 2 node Skylake EP machine with 64GiB memory, two 170GB SSD drives are used as swap devices with each attached to a different node, the result is: runtime=30m/processes=32/total test size=128G/each process mmap region=4G kernel throughput vanilla 13306 auto-binding 15169 +14% runtime=30m/processes=64/total test size=128G/each process mmap region=2G kernel throughput vanilla 11885 auto-binding 14879 +25% [aaron.lu@intel.com: v2] Link: http://lkml.kernel.org/r/20170814053130.GD2369@aaronlu.sh.intel.com Link: http://lkml.kernel.org/r/20170816024439.GA10925@aaronlu.sh.intel.com [akpm@linux-foundation.org: use kmalloc_array()] Link: http://lkml.kernel.org/r/20170814053130.GD2369@aaronlu.sh.intel.com Link: http://lkml.kernel.org/r/20170816024439.GA10925@aaronlu.sh.intel.com Signed-off-by: Aaron Lu Cc: "Chen, Tim C" Cc: Huang Ying Cc: Andi Kleen Cc: Michal Hocko Cc: Minchan Kim Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 9c4ae6f14eea..8bf3487fb204 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -212,7 +212,7 @@ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ - struct plist_node avail_list; /* entry in swap_avail_head */ + struct plist_node avail_lists[MAX_NUMNODES];/* entry in swap_avail_heads */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ -- cgit From 212925802454672e6cd2949a727f5e2c1377bf06 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 6 Sep 2017 16:25:00 -0700 Subject: mm: oom: let oom_reap_task and exit_mmap run concurrently This is purely required because exit_aio() may block and exit_mmap() may never start, if the oom_reap_task cannot start running on a mm with mm_users == 0. At the same time if the OOM reaper doesn't wait at all for the memory of the current OOM candidate to be freed by exit_mmap->unmap_vmas, it would generate a spurious OOM kill. If it wasn't because of the exit_aio or similar blocking functions in the last mmput, it would be enough to change the oom_reap_task() in the case it finds mm_users == 0, to wait for a timeout or to wait for __mmput to set MMF_OOM_SKIP itself, but it's not just exit_mmap the problem here so the concurrency of exit_mmap and oom_reap_task is apparently warranted. It's a non standard runtime, exit_mmap() runs without mmap_sem, and oom_reap_task runs with the mmap_sem for reading as usual (kind of MADV_DONTNEED). The race between the two is solved with a combination of tsk_is_oom_victim() (serialized by task_lock) and MMF_OOM_SKIP (serialized by a dummy down_write/up_write cycle on the same lines of the ksm_exit method). If the oom_reap_task() may be running concurrently during exit_mmap, exit_mmap will wait it to finish in down_write (before taking down mm structures that would make the oom_reap_task fail with use after free). If exit_mmap comes first, oom_reap_task() will skip the mm if MMF_OOM_SKIP is already set and in turn all memory is already freed and furthermore the mm data structures may already have been taken down by free_pgtables. [aarcange@redhat.com: incremental one liner] Link: http://lkml.kernel.org/r/20170726164319.GC29716@redhat.com [rientjes@google.com: remove unused mmput_async] Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1708141733130.50317@chino.kir.corp.google.com [aarcange@redhat.com: microoptimization] Link: http://lkml.kernel.org/r/20170817171240.GB5066@redhat.com Link: http://lkml.kernel.org/r/20170726162912.GA29716@redhat.com Fixes: 26db62f179d1 ("oom: keep mm of the killed task available") Signed-off-by: Andrea Arcangeli Signed-off-by: David Rientjes Reported-by: David Rientjes Tested-by: David Rientjes Reviewed-by: Michal Hocko Cc: Tetsuo Handa Cc: Oleg Nesterov Cc: Hugh Dickins Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 2b0a281f9d26..3a19c253bdb1 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -84,12 +84,6 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU -/* same as above but performs the slow path from the async context. Can - * be called from the atomic context as well - */ -extern void mmput_async(struct mm_struct *); -#endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); -- cgit From c79b57e462b5d2f47afa5f175cf1828f16e18612 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 6 Sep 2017 16:25:04 -0700 Subject: mm: hugetlb: clear target sub-page last when clearing huge page Huge page helps to reduce TLB miss rate, but it has higher cache footprint, sometimes this may cause some issue. For example, when clearing huge page on x86_64 platform, the cache footprint is 2M. But on a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M LLC (last level cache). That is, in average, there are 2.5M LLC for each core and 1.25M LLC for each thread. If the cache pressure is heavy when clearing the huge page, and we clear the huge page from the begin to the end, it is possible that the begin of huge page is evicted from the cache after we finishing clearing the end of the huge page. And it is possible for the application to access the begin of the huge page after clearing the huge page. To help the above situation, in this patch, when we clear a huge page, the order to clear sub-pages is changed. In quite some situation, we can get the address that the application will access after we clear the huge page, for example, in a page fault handler. Instead of clearing the huge page from begin to end, we will clear the sub-pages farthest from the the sub-page to access firstly, and clear the sub-page to access last. This will make the sub-page to access most cache-hot and sub-pages around it more cache-hot too. If we cannot know the address the application will access, the begin of the huge page is assumed to be the the address the application will access. With this patch, the throughput increases ~28.3% in vm-scalability anon-w-seq test case with 72 processes on a 2 socket Xeon E5 v3 2699 system (36 cores, 72 threads). The test case creates 72 processes, each process mmap a big anonymous memory area and writes to it from the begin to the end. For each process, other processes could be seen as other workload which generates heavy cache pressure. At the same time, the cache miss rate reduced from ~33.4% to ~31.7%, the IPC (instruction per cycle) increased from 0.56 to 0.74, and the time spent in user space is reduced ~7.9% Christopher Lameter suggests to clear bytes inside a sub-page from end to begin too. But tests show no visible performance difference in the tests. May because the size of page is small compared with the cache size. Thanks Andi Kleen to propose to use address to access to determine the order of sub-pages to clear. The hugetlbfs access address could be improved, will do that in another patch. [ying.huang@intel.com: improve readability of clear_huge_page()] Link: http://lkml.kernel.org/r/20170830051842.1397-1-ying.huang@intel.com Link: http://lkml.kernel.org/r/20170815014618.15842-1-ying.huang@intel.com Suggested-by: Andi Kleen Signed-off-by: "Huang, Ying" Acked-by: Jan Kara Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Cc: "Kirill A. Shutemov" Cc: Nadia Yvette Chambers Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Minchan Kim Cc: Shaohua Li Cc: Christopher Lameter Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb5e4bc946cc..0acea73af839 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2508,7 +2508,7 @@ enum mf_action_page_type { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, - unsigned long addr, + unsigned long addr_hint, unsigned int pages_per_huge_page); extern void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma, -- cgit From df3735c5b40fad8d0d28eb8ab065fe955b3347ee Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 6 Sep 2017 16:25:11 -0700 Subject: x86,mpx: make mpx depend on x86-64 to free up VMA flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm,fork,security: introduce MADV_WIPEONFORK", v4. If a child process accesses memory that was MADV_WIPEONFORK, it will get zeroes. The address ranges are still valid, they are just empty. If a child process accesses memory that was MADV_DONTFORK, it will get a segmentation fault, since those address ranges are no longer valid in the child after fork. Since MADV_DONTFORK also seems to be used to allow very large programs to fork in systems with strict memory overcommit restrictions, changing the semantics of MADV_DONTFORK might break existing programs. The use case is libraries that store or cache information, and want to know that they need to regenerate it in the child process after fork. Examples of this would be: - systemd/pulseaudio API checks (fail after fork) (replacing a getpid check, which is too slow without a PID cache) - PKCS#11 API reinitialization check (mandated by specification) - glibc's upcoming PRNG (reseed after fork) - OpenSSL PRNG (reseed after fork) The security benefits of a forking server having a re-inialized PRNG in every child process are pretty obvious. However, due to libraries having all kinds of internal state, and programs getting compiled with many different versions of each library, it is unreasonable to expect calling programs to re-initialize everything manually after fork. A further complication is the proliferation of clone flags, programs bypassing glibc's functions to call clone directly, and programs calling unshare, causing the glibc pthread_atfork hook to not get called. It would be better to have the kernel take care of this automatically. The patchset also adds MADV_KEEPONFORK, to undo the effects of a prior MADV_WIPEONFORK. This is similar to the OpenBSD minherit syscall with MAP_INHERIT_ZERO: https://man.openbsd.org/minherit.2 This patch (of 2): MPX only seems to be available on 64 bit CPUs, starting with Skylake and Goldmont. Move VM_MPX into the 64 bit only portion of vma->vm_flags, in order to free up a VMA flag. Link: http://lkml.kernel.org/r/20170811212829.29186-2-riel@redhat.com Signed-off-by: Rik van Riel Acked-by: Dave Hansen Cc: Mike Kravetz Cc: Florian Weimer Cc: Kees Cook Cc: Andy Lutomirski Cc: Will Drewry Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Colm MacCártaigh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0acea73af839..9efe62032094 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -208,10 +208,12 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) +#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #if defined(CONFIG_X86) @@ -235,9 +237,11 @@ extern unsigned int kobjsize(const void *objp); # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif -#if defined(CONFIG_X86) +#if defined(CONFIG_X86_INTEL_MPX) /* MPX specific bounds table or bounds directory */ -# define VM_MPX VM_ARCH_2 +# define VM_MPX VM_HIGH_ARCH_BIT_4 +#else +# define VM_MPX VM_NONE #endif #ifndef VM_GROWSUP -- cgit From d2cd9ede6e193dd7d88b6d27399e96229a551b19 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 6 Sep 2017 16:25:15 -0700 Subject: mm,fork: introduce MADV_WIPEONFORK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce MADV_WIPEONFORK semantics, which result in a VMA being empty in the child process after fork. This differs from MADV_DONTFORK in one important way. If a child process accesses memory that was MADV_WIPEONFORK, it will get zeroes. The address ranges are still valid, they are just empty. If a child process accesses memory that was MADV_DONTFORK, it will get a segmentation fault, since those address ranges are no longer valid in the child after fork. Since MADV_DONTFORK also seems to be used to allow very large programs to fork in systems with strict memory overcommit restrictions, changing the semantics of MADV_DONTFORK might break existing programs. MADV_WIPEONFORK only works on private, anonymous VMAs. The use case is libraries that store or cache information, and want to know that they need to regenerate it in the child process after fork. Examples of this would be: - systemd/pulseaudio API checks (fail after fork) (replacing a getpid check, which is too slow without a PID cache) - PKCS#11 API reinitialization check (mandated by specification) - glibc's upcoming PRNG (reseed after fork) - OpenSSL PRNG (reseed after fork) The security benefits of a forking server having a re-inialized PRNG in every child process are pretty obvious. However, due to libraries having all kinds of internal state, and programs getting compiled with many different versions of each library, it is unreasonable to expect calling programs to re-initialize everything manually after fork. A further complication is the proliferation of clone flags, programs bypassing glibc's functions to call clone directly, and programs calling unshare, causing the glibc pthread_atfork hook to not get called. It would be better to have the kernel take care of this automatically. The patch also adds MADV_KEEPONFORK, to undo the effects of a prior MADV_WIPEONFORK. This is similar to the OpenBSD minherit syscall with MAP_INHERIT_ZERO: https://man.openbsd.org/minherit.2 [akpm@linux-foundation.org: numerically order arch/parisc/include/uapi/asm/mman.h #defines] Link: http://lkml.kernel.org/r/20170811212829.29186-3-riel@redhat.com Signed-off-by: Rik van Riel Reported-by: Florian Weimer Reported-by: Colm MacCártaigh Reviewed-by: Mike Kravetz Cc: "H. Peter Anvin" Cc: "Kirill A. Shutemov" Cc: Andy Lutomirski Cc: Dave Hansen Cc: Ingo Molnar Cc: Helge Deller Cc: Kees Cook Cc: Matthew Wilcox Cc: Thomas Gleixner Cc: Will Drewry Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- include/trace/events/mmflags.h | 8 +------- include/uapi/asm-generic/mman-common.h | 3 +++ 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9efe62032094..39db8e54c5d5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -189,7 +189,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ -#define VM_ARCH_2 0x02000000 +#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ #ifdef CONFIG_MEM_SOFT_DIRTY diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 8e50d01c645f..4c2e4737d7bc 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -125,12 +125,6 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) #define __VM_ARCH_SPECIFIC_1 {VM_ARCH_1, "arch_1" } #endif -#if defined(CONFIG_X86) -#define __VM_ARCH_SPECIFIC_2 {VM_MPX, "mpx" } -#else -#define __VM_ARCH_SPECIFIC_2 {VM_ARCH_2, "arch_2" } -#endif - #ifdef CONFIG_MEM_SOFT_DIRTY #define IF_HAVE_VM_SOFTDIRTY(flag,name) {flag, name }, #else @@ -162,7 +156,7 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) {VM_NORESERVE, "noreserve" }, \ {VM_HUGETLB, "hugetlb" }, \ __VM_ARCH_SPECIFIC_1 , \ - __VM_ARCH_SPECIFIC_2 , \ + {VM_WIPEONFORK, "wipeonfork" }, \ {VM_DONTDUMP, "dontdump" }, \ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ {VM_MIXEDMAP, "mixedmap" }, \ diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index d248f3c335b5..203268f9231e 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -58,6 +58,9 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 -- cgit From 50e76632339d4655859523a39249dd95ee5e93e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Sep 2017 11:13:38 +0200 Subject: sched/cpuset/pm: Fix cpuset vs. suspend-resume bugs Cpusets vs. suspend-resume is _completely_ broken. And it got noticed because it now resulted in non-cpuset usage breaking too. On suspend cpuset_cpu_inactive() doesn't call into cpuset_update_active_cpus() because it doesn't want to move tasks about, there is no need, all tasks are frozen and won't run again until after we've resumed everything. But this means that when we finally do call into cpuset_update_active_cpus() after resuming the last frozen cpu in cpuset_cpu_active(), the top_cpuset will not have any difference with the cpu_active_mask and this it will not in fact do _anything_. So the cpuset configuration will not be restored. This was largely hidden because we would unconditionally create identity domains and mobile users would not in fact use cpusets much. And servers what do use cpusets tend to not suspend-resume much. An addition problem is that we'd not in fact wait for the cpuset work to finish before resuming the tasks, allowing spurious migrations outside of the specified domains. Fix the rebuild by introducing cpuset_force_rebuild() and fix the ordering with cpuset_wait_for_hotplug(). Reported-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Tejun Heo Cc: Thomas Gleixner Fixes: deb7aa308ea2 ("cpuset: reorganize CPU / memory hotplug handling") Link: http://lkml.kernel.org/r/20170907091338.orwxrqkbfkki3c24@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index e74655d941b7..a1e6a33a4b03 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -51,7 +51,9 @@ static inline void cpuset_dec(void) extern int cpuset_init(void); extern void cpuset_init_smp(void); +extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); +extern void cpuset_wait_for_hotplug(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -164,11 +166,15 @@ static inline bool cpusets_enabled(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} +static inline void cpuset_force_rebuild(void) { } + static inline void cpuset_update_active_cpus(void) { partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_wait_for_hotplug(void) { } + static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { -- cgit From 21d9bb4a05bac50fb4f850517af4030baecd00f6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Sep 2017 11:38:37 +0200 Subject: x86/mm: Make the SME mask a u64 The SME encryption mask is for masking 64-bit pagetable entries. It being an unsigned long works fine on X86_64 but on 32-bit builds in truncates bits leading to Xen guests crashing very early. And regardless, the whole SME mask handling shouldnt've leaked into 32-bit because SME is X86_64-only feature. So, first make the mask u64. And then, add trivial 32-bit versions of the __sme_* macros so that nothing happens there. Reported-and-tested-by: Boris Ostrovsky Tested-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Tom Lendacky Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Fixes: 21729f81ce8a ("x86/mm: Provide general kernel support for memory encryption") Link: http://lkml.kernel.org/r/20170907093837.76zojtkgebwtqc74@pd.tnic Signed-off-by: Ingo Molnar --- include/linux/mem_encrypt.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index 1255f09f5e42..265a9cd21cb4 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -21,7 +21,7 @@ #else /* !CONFIG_ARCH_HAS_MEM_ENCRYPT */ -#define sme_me_mask 0UL +#define sme_me_mask 0ULL #endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ @@ -30,18 +30,23 @@ static inline bool sme_active(void) return !!sme_me_mask; } -static inline unsigned long sme_get_me_mask(void) +static inline u64 sme_get_me_mask(void) { return sme_me_mask; } +#ifdef CONFIG_AMD_MEM_ENCRYPT /* * The __sme_set() and __sme_clr() macros are useful for adding or removing * the encryption mask from a value (e.g. when dealing with pagetable * entries). */ -#define __sme_set(x) ((unsigned long)(x) | sme_me_mask) -#define __sme_clr(x) ((unsigned long)(x) & ~sme_me_mask) +#define __sme_set(x) ((x) | sme_me_mask) +#define __sme_clr(x) ((x) & ~sme_me_mask) +#else +#define __sme_set(x) (x) +#define __sme_clr(x) (x) +#endif #endif /* __ASSEMBLY__ */ -- cgit From ca2c1418efe9f7fe37aa1f355efdf4eb293673ce Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 6 Sep 2017 14:44:36 +0200 Subject: udp: drop head states only when all skb references are gone After commit 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing") we clear the skb head state as soon as the skb carrying them is first processed. Since the same skb can be processed several times when MSG_PEEK is used, we can end up lacking the required head states, and eventually oopsing. Fix this clearing the skb head state only when processing the last skb reference. Reported-by: Eric Dumazet Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f751f3b93039..72299ef00061 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -958,7 +958,7 @@ void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); void consume_skb(struct sk_buff *skb); -void consume_stateless_skb(struct sk_buff *skb); +void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; -- cgit From e1bf1687740ce1a3598a1c5e452b852ff2190682 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Sep 2017 14:39:51 +0200 Subject: netfilter: nat: Revert "netfilter: nat: convert nat bysrc hash to rhashtable" This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1. It was not a good idea. The custom hash table was a much better fit for this purpose. A fast lookup is not essential, in fact for most cases there is no lookup at all because original tuple is not taken and can be used as-is. What needs to be fast is insertion and deletion. rhlist removal however requires a rhlist walk. We can have thousands of entries in such a list if source port/addresses are reused for multiple flows, if this happens removal requests are so expensive that deletions of a few thousand flows can take several seconds(!). The advantages that we got from rhashtable are: 1) table auto-sizing 2) multiple locks 1) would be nice to have, but it is not essential as we have at most one lookup per new flow, so even a million flows in the bysource table are not a problem compared to current deletion cost. 2) is easy to add to custom hash table. I tried to add hlist_node to rhlist to speed up rhltable_remove but this isn't doable without changing semantics. rhltable_remove_fast will check that the to-be-deleted object is part of the table and that requires a list walk that we want to avoid. Furthermore, using hlist_node increases size of struct rhlist_head, which in turn increases nf_conn size. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196821 Reported-by: Ivan Babrou Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +-- include/net/netfilter/nf_nat.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index fdc9c64a1c94..8f3bd30511de 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -77,7 +76,7 @@ struct nf_conn { possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) - struct rhlist_head nat_bysource; + struct hlist_node nat_bysource; #endif /* all members below initialized via memset */ u8 __nfct_init_offset[0]; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 05c82a1a4267..b71701302e61 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,6 +1,5 @@ #ifndef _NF_NAT_H #define _NF_NAT_H -#include #include #include #include -- cgit From b5ff8161e37cef3265e186ecded23324e4dc2973 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 8 Sep 2017 16:10:49 -0700 Subject: mm: thp: introduce separate TTU flag for thp freezing TTU_MIGRATION is used to convert pte into migration entry until thp split completes. This behavior conflicts with thp migration added later patches, so let's introduce a new TTU flag specifically for freezing. try_to_unmap() is used both for thp split (via freeze_page()) and page migration (via __unmap_and_move()). In freeze_page(), ttu_flag given for head page is like below (assuming anonymous thp): (TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED | \ TTU_MIGRATION | TTU_SPLIT_HUGE_PMD) and ttu_flag given for tail pages is: (TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED | \ TTU_MIGRATION) __unmap_and_move() calls try_to_unmap() with ttu_flag: (TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS) Now I'm trying to insert a branch for thp migration at the top of try_to_unmap_one() like below static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { ... /* PMD-mapped THP migration entry */ if (!pvmw.pte && (flags & TTU_MIGRATION)) { if (!PageAnon(page)) continue; set_pmd_migration_entry(&pvmw, page); continue; } ... } so try_to_unmap() for tail pages called by thp split can go into thp migration code path (which converts *pmd* into migration entry), while the expectation is to freeze thp (which converts *pte* into migration entry.) I detected this failure as a "bad page state" error in a testcase where split_huge_page() is called from queue_pages_pte_range(). Link: http://lkml.kernel.org/r/20170717193955.20207-4-zi.yan@sent.com Signed-off-by: Naoya Horiguchi Signed-off-by: Zi Yan Acked-by: Kirill A. Shutemov Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Mel Gorman Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 43ef2c30cb0f..f8ca2e74b819 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -93,8 +93,9 @@ enum ttu_flags { TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ - TTU_RMAP_LOCKED = 0x80 /* do not grab rmap lock: + TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ + TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */ }; #ifdef CONFIG_MMU -- cgit From 9c670ea37947a82cb6d4df69139f7e46ed71a0ac Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 8 Sep 2017 16:10:53 -0700 Subject: mm: thp: introduce CONFIG_ARCH_ENABLE_THP_MIGRATION Introduce CONFIG_ARCH_ENABLE_THP_MIGRATION to limit thp migration functionality to x86_64, which should be safer at the first step. Link: http://lkml.kernel.org/r/20170717193955.20207-5-zi.yan@sent.com Signed-off-by: Naoya Horiguchi Signed-off-by: Zi Yan Reviewed-by: Anshuman Khandual Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ee696347f928..d8f35a0865dc 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -233,6 +233,11 @@ void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) +static inline bool thp_migration_supported(void) +{ + return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -336,6 +341,11 @@ static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, { return NULL; } + +static inline bool thp_migration_supported(void) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ -- cgit From 616b8371539a6c487404c3b8fb04078016dab4ba Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 8 Sep 2017 16:10:57 -0700 Subject: mm: thp: enable thp migration in generic path Add thp migration's core code, including conversions between a PMD entry and a swap entry, setting PMD migration entry, removing PMD migration entry, and waiting on PMD migration entries. This patch makes it possible to support thp migration. If you fail to allocate a destination page as a thp, you just split the source thp as we do now, and then enter the normal page migration. If you succeed to allocate destination thp, you enter thp migration. Subsequent patches actually enable thp migration for each caller of page migration by allowing its get_new_page() callback to allocate thps. [zi.yan@cs.rutgers.edu: fix gcc-4.9.0 -Wmissing-braces warning] Link: http://lkml.kernel.org/r/A0ABA698-7486-46C3-B209-E95A9048B22C@cs.rutgers.edu [akpm@linux-foundation.org: fix x86_64 allnoconfig warning] Signed-off-by: Zi Yan Acked-by: Kirill A. Shutemov Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Mel Gorman Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c5ff7b217ee6..82089fd88c29 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -103,7 +103,8 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { - BUG_ON(!PageLocked(page)); + BUG_ON(!PageLocked(compound_head(page))); + return swp_entry(write ? SWP_MIGRATION_WRITE : SWP_MIGRATION_READ, page_to_pfn(page)); } @@ -126,7 +127,7 @@ static inline struct page *migration_entry_to_page(swp_entry_t entry) * Any use of migration entries may only occur while the * corresponding page is locked */ - BUG_ON(!PageLocked(p)); + BUG_ON(!PageLocked(compound_head(p))); return p; } @@ -148,7 +149,11 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } -#define migration_entry_to_page(swp) NULL +static inline struct page *migration_entry_to_page(swp_entry_t entry) +{ + return NULL; +} + static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { } @@ -163,6 +168,68 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +struct page_vma_mapped_walk; + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page); + +extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, + struct page *new); + +extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); + +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) +{ + swp_entry_t arch_entry; + + arch_entry = __pmd_to_swp_entry(pmd); + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) +{ + swp_entry_t arch_entry; + + arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); + return __swp_entry_to_pmd(arch_entry); +} + +static inline int is_pmd_migration_entry(pmd_t pmd) +{ + return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); +} +#else +static inline void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + BUILD_BUG(); +} + +static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, + struct page *new) +{ + BUILD_BUG(); +} + +static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } + +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) +{ + return swp_entry(0, 0); +} + +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) +{ + return __pmd(0); +} + +static inline int is_pmd_migration_entry(pmd_t pmd) +{ + return 0; +} +#endif + #ifdef CONFIG_MEMORY_FAILURE extern atomic_long_t num_poisoned_pages __read_mostly; -- cgit From 84c3fc4e9c563d8fb91cfdf5948da48fe1af34d3 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 8 Sep 2017 16:11:01 -0700 Subject: mm: thp: check pmd migration entry in common path When THP migration is being used, memory management code needs to handle pmd migration entries properly. This patch uses !pmd_present() or is_swap_pmd() (depending on whether pmd_none() needs separate code or not) to check pmd migration entries at the places where a pmd entry is present. Since pmd-related code uses split_huge_page(), split_huge_pmd(), pmd_trans_huge(), pmd_trans_unstable(), or pmd_none_or_trans_huge_or_clear_bad(), this patch: 1. adds pmd migration entry split code in split_huge_pmd(), 2. takes care of pmd migration entries whenever pmd_trans_huge() is present, 3. makes pmd_none_or_trans_huge_or_clear_bad() pmd migration entry aware. Since split_huge_page() uses split_huge_pmd() and pmd_trans_unstable() is equivalent to pmd_none_or_trans_huge_or_clear_bad(), we do not change them. Until this commit, a pmd entry should be: 1. pointing to a pte page, 2. is_swap_pmd(), 3. pmd_trans_huge(), 4. pmd_devmap(), or 5. pmd_none(). Signed-off-by: Zi Yan Cc: Kirill A. Shutemov Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Mel Gorman Cc: Minchan Kim Cc: Naoya Horiguchi Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 18 +++++++++++++++++- include/linux/huge_mm.h | 14 ++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d7bb98f4134..4f93a6d10a47 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -846,7 +846,23 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE barrier(); #endif - if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) + /* + * !pmd_present() checks for pmd migration entries + * + * The complete check uses is_pmd_migration_entry() in linux/swapops.h + * But using that requires moving current function and pmd_trans_unstable() + * to linux/swapops.h to resovle dependency, which is too much code move. + * + * !pmd_present() is equivalent to is_pmd_migration_entry() currently, + * because !pmd_present() pages can only be under migration not swapped + * out. + * + * pmd_none() is preseved for future condition checks on pmd migration + * entries and not confusing with this function name, although it is + * redundant with !pmd_present(). + */ + if (pmd_none(pmdval) || pmd_trans_huge(pmdval) || + (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval))) return 1; if (unlikely(pmd_bad(pmdval))) { pmd_clear_bad(pmd); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index d8f35a0865dc..14bc21c2ee7f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -147,7 +147,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (pmd_trans_huge(*____pmd) \ + if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false, NULL); \ @@ -178,12 +178,18 @@ extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); extern spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); + +static inline int is_swap_pmd(pmd_t pmd) +{ + return !pmd_none(pmd) && !pmd_present(pmd); +} + /* mmap_sem must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; @@ -299,6 +305,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, long adjust_next) { } +static inline int is_swap_pmd(pmd_t pmd) +{ + return 0; +} static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { -- cgit From ab6e3d0939bb332d72444a532f0f72e0dfde7b7b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 8 Sep 2017 16:11:04 -0700 Subject: mm: soft-dirty: keep soft-dirty bits over thp migration Soft dirty bit is designed to keep tracked over page migration. This patch makes it work in the same manner for thp migration too. Signed-off-by: Naoya Horiguchi Signed-off-by: Zi Yan Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 34 +++++++++++++++++++++++++++++++++- include/linux/swapops.h | 2 ++ 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4f93a6d10a47..8e0243036564 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -630,7 +630,24 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_start_context_switch(prev) do {} while (0) #endif -#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd; +} +#endif +#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ static inline int pte_soft_dirty(pte_t pte) { return 0; @@ -675,6 +692,21 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte; } + +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd; +} #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 82089fd88c29..45b092aa6419 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -183,6 +183,8 @@ static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) { swp_entry_t arch_entry; + if (pmd_swp_soft_dirty(pmd)) + pmd = pmd_swp_clear_soft_dirty(pmd); arch_entry = __pmd_to_swp_entry(pmd); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } -- cgit From 8135d8926c08e553e39b0b040c6d01f0daef0676 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 8 Sep 2017 16:11:15 -0700 Subject: mm: memory_hotplug: memory hotremove supports thp migration This patch enables thp migration for memory hotremove. Link: http://lkml.kernel.org/r/20170717193955.20207-11-zi.yan@sent.com Signed-off-by: Naoya Horiguchi Signed-off-by: Zi Yan Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Dave Hansen Cc: David Nellans Cc: Ingo Molnar Cc: Kirill A. Shutemov Cc: Mel Gorman Cc: Minchan Kim Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3e0d405dc842..ce15989521a1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -35,15 +35,28 @@ static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; + unsigned int order = 0; + struct page *new_page = NULL; if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), preferred_nid, nodemask); + if (thp_migration_supported() && PageTransHuge(page)) { + order = HPAGE_PMD_ORDER; + gfp_mask |= GFP_TRANSHUGE; + } + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask); + new_page = __alloc_pages_nodemask(gfp_mask, order, + preferred_nid, nodemask); + + if (new_page && PageTransHuge(page)) + prep_transhuge_page(new_page); + + return new_page; } #ifdef CONFIG_MIGRATION -- cgit From 133ff0eac95b7dc6edf89dc51bd139a0630bbae7 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:23 -0700 Subject: mm/hmm: heterogeneous memory management (HMM for short) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HMM provides 3 separate types of functionality: - Mirroring: synchronize CPU page table and device page table - Device memory: allocating struct page for device memory - Migration: migrating regular memory to device memory This patch introduces some common helpers and definitions to all of those 3 functionality. Link: http://lkml.kernel.org/r/20170817000548.32038-3-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 152 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 6 ++ 2 files changed, 158 insertions(+) create mode 100644 include/linux/hmm.h (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h new file mode 100644 index 000000000000..5d83fec6dfdd --- /dev/null +++ b/include/linux/hmm.h @@ -0,0 +1,152 @@ +/* + * Copyright 2013 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Jérôme Glisse + */ +/* + * Heterogeneous Memory Management (HMM) + * + * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it + * is for. Here we focus on the HMM API description, with some explanation of + * the underlying implementation. + * + * Short description: HMM provides a set of helpers to share a virtual address + * space between CPU and a device, so that the device can access any valid + * address of the process (while still obeying memory protection). HMM also + * provides helpers to migrate process memory to device memory, and back. Each + * set of functionality (address space mirroring, and migration to and from + * device memory) can be used independently of the other. + * + * + * HMM address space mirroring API: + * + * Use HMM address space mirroring if you want to mirror range of the CPU page + * table of a process into a device page table. Here, "mirror" means "keep + * synchronized". Prerequisites: the device must provide the ability to write- + * protect its page tables (at PAGE_SIZE granularity), and must be able to + * recover from the resulting potential page faults. + * + * HMM guarantees that at any point in time, a given virtual address points to + * either the same memory in both CPU and device page tables (that is: CPU and + * device page tables each point to the same pages), or that one page table (CPU + * or device) points to no entry, while the other still points to the old page + * for the address. The latter case happens when the CPU page table update + * happens first, and then the update is mirrored over to the device page table. + * This does not cause any issue, because the CPU page table cannot start + * pointing to a new page until the device page table is invalidated. + * + * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any + * updates to each device driver that has registered a mirror. It also provides + * some API calls to help with taking a snapshot of the CPU page table, and to + * synchronize with any updates that might happen concurrently. + * + * + * HMM migration to and from device memory: + * + * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with + * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page + * of the device memory, and allows the device driver to manage its memory + * using those struct pages. Having struct pages for device memory makes + * migration easier. Because that memory is not addressable by the CPU it must + * never be pinned to the device; in other words, any CPU page fault can always + * cause the device memory to be migrated (copied/moved) back to regular memory. + * + * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that + * allows use of a device DMA engine to perform the copy operation between + * regular system memory and device memory. + */ +#ifndef LINUX_HMM_H +#define LINUX_HMM_H + +#include + +#if IS_ENABLED(CONFIG_HMM) + + +/* + * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page + * + * Flags: + * HMM_PFN_VALID: pfn is valid + * HMM_PFN_WRITE: CPU page table has write permission set + */ +typedef unsigned long hmm_pfn_t; + +#define HMM_PFN_VALID (1 << 0) +#define HMM_PFN_WRITE (1 << 1) +#define HMM_PFN_SHIFT 2 + +/* + * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t + * @pfn: hmm_pfn_t to convert to struct page + * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise + * + * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page + * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL. + */ +static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn) +{ + if (!(pfn & HMM_PFN_VALID)) + return NULL; + return pfn_to_page(pfn >> HMM_PFN_SHIFT); +} + +/* + * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t + * @pfn: hmm_pfn_t to extract pfn from + * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise + */ +static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn) +{ + if (!(pfn & HMM_PFN_VALID)) + return -1UL; + return (pfn >> HMM_PFN_SHIFT); +} + +/* + * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page + * @page: struct page pointer for which to create the hmm_pfn_t + * Returns: valid hmm_pfn_t for the page + */ +static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page) +{ + return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID; +} + +/* + * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn + * @pfn: pfn value for which to create the hmm_pfn_t + * Returns: valid hmm_pfn_t for the pfn + */ +static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) +{ + return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID; +} + + +/* Below are for HMM internal use only! Not to be used by device driver! */ +void hmm_mm_destroy(struct mm_struct *mm); + +static inline void hmm_mm_init(struct mm_struct *mm) +{ + mm->hmm = NULL; +} + +#else /* IS_ENABLED(CONFIG_HMM) */ + +/* Below are for HMM internal use only! Not to be used by device driver! */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} + +#endif /* IS_ENABLED(CONFIG_HMM) */ +#endif /* LINUX_HMM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f45ad815b7d7..46f4ecf5479a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ struct address_space; struct mem_cgroup; +struct hmm; /* * Each physical page in the system has a struct page associated with @@ -503,6 +504,11 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; + +#if IS_ENABLED(CONFIG_HMM) + /* HMM needs to track a few things per mm */ + struct hmm *hmm; +#endif } __randomize_layout; extern struct mm_struct init_mm; -- cgit From c0b124054f9e42eb6da545a10fe9122a7d7c3f72 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:27 -0700 Subject: mm/hmm/mirror: mirror process address space on device with HMM helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a heterogeneous memory management (HMM) process address space mirroring. In a nutshell this provide an API to mirror process address space on a device. This boils down to keeping CPU and device page table synchronize (we assume that both device and CPU are cache coherent like PCIe device can be). This patch provide a simple API for device driver to achieve address space mirroring thus avoiding each device driver to grow its own CPU page table walker and its own CPU page table synchronization mechanism. This is useful for NVidia GPU >= Pascal, Mellanox IB >= mlx5 and more hardware in the future. [jglisse@redhat.com: fix hmm for "mmu_notifier kill invalidate_page callback"] Link: http://lkml.kernel.org/r/20170830231955.GD9445@redhat.com Link: http://lkml.kernel.org/r/20170817000548.32038-4-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 5d83fec6dfdd..76310726ee9c 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -72,6 +72,7 @@ #if IS_ENABLED(CONFIG_HMM) +struct hmm; /* * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page @@ -134,6 +135,115 @@ static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) } +#if IS_ENABLED(CONFIG_HMM_MIRROR) +/* + * Mirroring: how to synchronize device page table with CPU page table. + * + * A device driver that is participating in HMM mirroring must always + * synchronize with CPU page table updates. For this, device drivers can either + * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device + * drivers can decide to register one mirror per device per process, or just + * one mirror per process for a group of devices. The pattern is: + * + * int device_bind_address_space(..., struct mm_struct *mm, ...) + * { + * struct device_address_space *das; + * + * // Device driver specific initialization, and allocation of das + * // which contains an hmm_mirror struct as one of its fields. + * ... + * + * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); + * if (ret) { + * // Cleanup on error + * return ret; + * } + * + * // Other device driver specific initialization + * ... + * } + * + * Once an hmm_mirror is registered for an address space, the device driver + * will get callbacks through sync_cpu_device_pagetables() operation (see + * hmm_mirror_ops struct). + * + * Device driver must not free the struct containing the hmm_mirror struct + * before calling hmm_mirror_unregister(). The expected usage is to do that when + * the device driver is unbinding from an address space. + * + * + * void device_unbind_address_space(struct device_address_space *das) + * { + * // Device driver specific cleanup + * ... + * + * hmm_mirror_unregister(&das->mirror); + * + * // Other device driver specific cleanup, and now das can be freed + * ... + * } + */ + +struct hmm_mirror; + +/* + * enum hmm_update_type - type of update + * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why) + */ +enum hmm_update_type { + HMM_UPDATE_INVALIDATE, +}; + +/* + * struct hmm_mirror_ops - HMM mirror device operations callback + * + * @update: callback to update range on a device + */ +struct hmm_mirror_ops { + /* sync_cpu_device_pagetables() - synchronize page tables + * + * @mirror: pointer to struct hmm_mirror + * @update_type: type of update that occurred to the CPU page table + * @start: virtual start address of the range to update + * @end: virtual end address of the range to update + * + * This callback ultimately originates from mmu_notifiers when the CPU + * page table is updated. The device driver must update its page table + * in response to this callback. The update argument tells what action + * to perform. + * + * The device driver must not return from this callback until the device + * page tables are completely updated (TLBs flushed, etc); this is a + * synchronous call. + */ + void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, + enum hmm_update_type update_type, + unsigned long start, + unsigned long end); +}; + +/* + * struct hmm_mirror - mirror struct for a device driver + * + * @hmm: pointer to struct hmm (which is unique per mm_struct) + * @ops: device driver callback for HMM mirror operations + * @list: for list of mirrors of a given mm + * + * Each address space (mm_struct) being mirrored by a device must register one + * instance of an hmm_mirror struct with HMM. HMM will track the list of all + * mirrors for each mm_struct. + */ +struct hmm_mirror { + struct hmm *hmm; + const struct hmm_mirror_ops *ops; + struct list_head list; +}; + +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); +void hmm_mirror_unregister(struct hmm_mirror *mirror); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + /* Below are for HMM internal use only! Not to be used by device driver! */ void hmm_mm_destroy(struct mm_struct *mm); -- cgit From da4c3c735ea4dcc2a0b0ff0bd4803c336361b6f5 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:31 -0700 Subject: mm/hmm/mirror: helper to snapshot CPU page table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This does not use existing page table walker because we want to share same code for our page fault handler. Link: http://lkml.kernel.org/r/20170817000548.32038-5-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 76310726ee9c..62899c9829c9 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -79,13 +79,26 @@ struct hmm; * * Flags: * HMM_PFN_VALID: pfn is valid + * HMM_PFN_READ: CPU page table has read permission set * HMM_PFN_WRITE: CPU page table has write permission set + * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory + * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() + * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the + * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not + * be mirrored by a device, because the entry will never have HMM_PFN_VALID + * set and the pfn value is undefined. + * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE) */ typedef unsigned long hmm_pfn_t; #define HMM_PFN_VALID (1 << 0) -#define HMM_PFN_WRITE (1 << 1) -#define HMM_PFN_SHIFT 2 +#define HMM_PFN_READ (1 << 1) +#define HMM_PFN_WRITE (1 << 2) +#define HMM_PFN_ERROR (1 << 3) +#define HMM_PFN_EMPTY (1 << 4) +#define HMM_PFN_SPECIAL (1 << 5) +#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6) +#define HMM_PFN_SHIFT 7 /* * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t @@ -241,6 +254,44 @@ struct hmm_mirror { int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); void hmm_mirror_unregister(struct hmm_mirror *mirror); + + +/* + * struct hmm_range - track invalidation lock on virtual address range + * + * @list: all range lock are on a list + * @start: range virtual start address (inclusive) + * @end: range virtual end address (exclusive) + * @pfns: array of pfns (big enough for the range) + * @valid: pfns array did not change since it has been fill by an HMM function + */ +struct hmm_range { + struct list_head list; + unsigned long start; + unsigned long end; + hmm_pfn_t *pfns; + bool valid; +}; + +/* + * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device + * driver lock that serializes device page table updates, then call + * hmm_vma_range_done(), to check if the snapshot is still valid. The same + * device driver page table update lock must also be used in the + * hmm_mirror_ops.sync_cpu_device_pagetables() callback, so that CPU page + * table invalidation serializes on it. + * + * YOU MUST CALL hmm_vma_range_done() ONCE AND ONLY ONCE EACH TIME YOU CALL + * hmm_vma_get_pfns() WITHOUT ERROR ! + * + * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID ! + */ +int hmm_vma_get_pfns(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns); +bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range); #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -- cgit From 74eee180b935fcb9b83a56dd7648fb75caf38f0e Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:35 -0700 Subject: mm/hmm/mirror: device page fault handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This handles page fault on behalf of device driver, unlike handle_mm_fault() it does not trigger migration back to system memory for device memory. Link: http://lkml.kernel.org/r/20170817000548.32038-6-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 62899c9829c9..61a6535fe438 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -292,6 +292,33 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma, unsigned long end, hmm_pfn_t *pfns); bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range); + + +/* + * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will + * not migrate any device memory back to system memory. The hmm_pfn_t array will + * be updated with the fault result and current snapshot of the CPU page table + * for the range. + * + * The mmap_sem must be taken in read mode before entering and it might be + * dropped by the function if the block argument is false. In that case, the + * function returns -EAGAIN. + * + * Return value does not reflect if the fault was successful for every single + * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to + * determine fault status for each address. + * + * Trying to fault inside an invalid vma will result in -EINVAL. + * + * See the function description in mm/hmm.c for further documentation. + */ +int hmm_vma_fault(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns, + bool write, + bool block); #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -- cgit From 3072e413e305e353cd4654f8a57d953b66e85bf3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 8 Sep 2017 16:11:39 -0700 Subject: mm/memory_hotplug: introduce add_pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are new users of memory hotplug emerging. Some of them require different subset of arch_add_memory. There are some which only require allocation of struct pages without mapping those pages to the kernel address space. We currently have __add_pages for that purpose. But this is rather lowlevel and not very suitable for the code outside of the memory hotplug. E.g. x86_64 wants to update max_pfn which should be done by the caller. Introduce add_pages() which should care about those details if they are needed. Each architecture should define its implementation and select CONFIG_ARCH_HAS_ADD_PAGES. All others use the currently existing __add_pages. Link: http://lkml.kernel.org/r/20170817000548.32038-7-jglisse@redhat.com Signed-off-by: Michal Hocko Signed-off-by: Jérôme Glisse Acked-by: Balbir Singh Cc: Aneesh Kumar Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 5e6e4cc36ff4..0995e1a2b458 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -133,6 +133,17 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, bool want_memblock); +#ifndef CONFIG_ARCH_HAS_ADD_PAGES +static inline int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock) +{ + return __add_pages(nid, start_pfn, nr_pages, want_memblock); +} +#else /* ARCH_HAS_ADD_PAGES */ +int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock); +#endif /* ARCH_HAS_ADD_PAGES */ + #ifdef CONFIG_NUMA extern int memory_add_physaddr_to_nid(u64 start); #else -- cgit From 5042db43cc26f51eed51c56192e2c2317e44315f Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:43 -0700 Subject: mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HMM (heterogeneous memory management) need struct page to support migration from system main memory to device memory. Reasons for HMM and migration to device memory is explained with HMM core patch. This patch deals with device memory that is un-addressable memory (ie CPU can not access it). Hence we do not want those struct page to be manage like regular memory. That is why we extend ZONE_DEVICE to support different types of memory. A persistent memory type is define for existing user of ZONE_DEVICE and a new device un-addressable type is added for the un-addressable memory type. There is a clear separation between what is expected from each memory type and existing user of ZONE_DEVICE are un-affected by new requirement and new use of the un-addressable type. All specific code path are protect with test against the memory type. Because memory is un-addressable we use a new special swap type for when a page is migrated to device memory (this reduces the number of maximum swap file). The main two additions beside memory type to ZONE_DEVICE is two callbacks. First one, page_free() is call whenever page refcount reach 1 (which means the page is free as ZONE_DEVICE page never reach a refcount of 0). This allow device driver to manage its memory and associated struct page. The second callback page_fault() happens when there is a CPU access to an address that is back by a device page (which are un-addressable by the CPU). This callback is responsible to migrate the page back to system main memory. Device driver can not block migration back to system memory, HMM make sure that such page can not be pin into device memory. If device is in some error condition and can not migrate memory back then a CPU page fault to device memory should end with SIGBUS. [arnd@arndb.de: fix warning] Link: http://lkml.kernel.org/r/20170823133213.712917-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170817000548.32038-8-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Arnd Bergmann Acked-by: Dan Williams Cc: Ross Zwisler Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 1 + include/linux/memremap.h | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 12 ++++++++ include/linux/swap.h | 24 ++++++++++++++-- include/linux/swapops.h | 68 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 175 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6230064d7f95..3a4f69137bc2 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -130,6 +130,7 @@ enum { IORES_DESC_ACPI_NV_STORAGE = 3, IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, + IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, }; /* helpers to define resources */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 93416196ba64..8e164ec9eed0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -4,6 +4,8 @@ #include #include +#include + struct resource; struct device; @@ -35,18 +37,89 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) } #endif +/* + * Specialize ZONE_DEVICE memory into multiple types each having differents + * usage. + * + * MEMORY_DEVICE_HOST: + * Persistent device memory (pmem): struct page might be allocated in different + * memory and architecture might want to perform special actions. It is similar + * to regular memory, in that the CPU can access it transparently. However, + * it is likely to have different bandwidth and latency than regular memory. + * See Documentation/nvdimm/nvdimm.txt for more information. + * + * MEMORY_DEVICE_PRIVATE: + * Device memory that is not directly addressable by the CPU: CPU can neither + * read nor write private memory. In this case, we do still have struct pages + * backing the device memory. Doing so simplifies the implementation, but it is + * important to remember that there are certain points at which the struct page + * must be treated as an opaque object, rather than a "normal" struct page. + * + * A more complete discussion of unaddressable memory may be found in + * include/linux/hmm.h and Documentation/vm/hmm.txt. + */ +enum memory_type { + MEMORY_DEVICE_HOST = 0, + MEMORY_DEVICE_PRIVATE, +}; + +/* + * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two + * callbacks: + * page_fault() + * page_free() + * + * Additional notes about MEMORY_DEVICE_PRIVATE may be found in + * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief + * explanation in include/linux/memory_hotplug.h. + * + * The page_fault() callback must migrate page back, from device memory to + * system memory, so that the CPU can access it. This might fail for various + * reasons (device issues, device have been unplugged, ...). When such error + * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and + * set the CPU page table entry to "poisoned". + * + * Note that because memory cgroup charges are transferred to the device memory, + * this should never fail due to memory restrictions. However, allocation + * of a regular system page might still fail because we are out of memory. If + * that happens, the page_fault() callback must return VM_FAULT_OOM. + * + * The page_fault() callback can also try to migrate back multiple pages in one + * chunk, as an optimization. It must, however, prioritize the faulting address + * over all the others. + * + * + * The page_free() callback is called once the page refcount reaches 1 + * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. + * This allows the device driver to implement its own memory management.) + */ +typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); +typedef void (*dev_page_free_t)(struct page *page, void *data); + /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @page_fault: callback when CPU fault on an unaddressable device page + * @page_free: free page callback when page refcount reaches 1 * @altmap: pre-allocated/reserved memory for vmemmap allocations * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @dev: host device of the mapping for debug + * @data: private data pointer for page_free() + * @type: memory type: see MEMORY_* in memory_hotplug.h */ struct dev_pagemap { + dev_page_fault_t page_fault; + dev_page_free_t page_free; struct vmem_altmap *altmap; const struct resource *res; struct percpu_ref *ref; struct device *dev; + void *data; + enum memory_type type; }; #ifdef CONFIG_ZONE_DEVICE diff --git a/include/linux/mm.h b/include/linux/mm.h index 39db8e54c5d5..a74c4e954352 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -792,11 +792,23 @@ static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } + +static inline bool is_device_private_page(const struct page *page) +{ + /* See MEMORY_DEVICE_PRIVATE in include/linux/memory_hotplug.h */ + return ((page_zonenum(page) == ZONE_DEVICE) && + (page->pgmap->type == MEMORY_DEVICE_PRIVATE)); +} #else static inline bool is_zone_device_page(const struct page *page) { return false; } + +static inline bool is_device_private_page(const struct page *page) +{ + return false; +} #endif static inline void get_page(struct page *page) diff --git a/include/linux/swap.h b/include/linux/swap.h index 8bf3487fb204..8a807292037f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -50,6 +50,23 @@ static inline int current_is_kswapd(void) * actions on faults. */ +/* + * Unaddressable device memory support. See include/linux/hmm.h and + * Documentation/vm/hmm.txt. Short description is we need struct pages for + * device memory that is unaddressable (inaccessible) by CPU, so that we can + * migrate part of a process memory to device memory. + * + * When a page is migrated from CPU to device, we set the CPU page table entry + * to a special SWP_DEVICE_* entry. + */ +#ifdef CONFIG_DEVICE_PRIVATE +#define SWP_DEVICE_NUM 2 +#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) +#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) +#else +#define SWP_DEVICE_NUM 0 +#endif + /* * NUMA node memory migration support */ @@ -72,7 +89,8 @@ static inline int current_is_kswapd(void) #endif #define MAX_SWAPFILES \ - ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) /* * Magic header for a swap area. The first part of the union is @@ -469,8 +487,8 @@ static inline void show_swap_cache_info(void) { } -#define free_swap_and_cache(swp) is_migration_entry(swp) -#define swapcache_prepare(swp) is_migration_entry(swp) +#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) +#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 45b092aa6419..291c4b534658 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -100,6 +100,74 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); } +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +{ + return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ, + page_to_pfn(page)); +} + +static inline bool is_device_private_entry(swp_entry_t entry) +{ + int type = swp_type(entry); + return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; +} + +static inline void make_device_private_entry_read(swp_entry_t *entry) +{ + *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry)); +} + +static inline bool is_write_device_private_entry(swp_entry_t entry) +{ + return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); +} + +static inline struct page *device_private_entry_to_page(swp_entry_t entry) +{ + return pfn_to_page(swp_offset(entry)); +} + +int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp); +#else /* CONFIG_DEVICE_PRIVATE */ +static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +{ + return swp_entry(0, 0); +} + +static inline void make_device_private_entry_read(swp_entry_t *entry) +{ +} + +static inline bool is_device_private_entry(swp_entry_t entry) +{ + return false; +} + +static inline bool is_write_device_private_entry(swp_entry_t entry) +{ + return false; +} + +static inline struct page *device_private_entry_to_page(swp_entry_t entry) +{ + return NULL; +} + +static inline int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp) +{ + return VM_FAULT_SIGBUS; +} +#endif /* CONFIG_DEVICE_PRIVATE */ + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { -- cgit From 7b2d55d2c8961ae9d456d3133f4ae2f0fbd3e14f Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:46 -0700 Subject: mm/ZONE_DEVICE: special case put_page() for device private pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A ZONE_DEVICE page that reach a refcount of 1 is free ie no longer have any user. For device private pages this is important to catch and thus we need to special case put_page() for this. Link: http://lkml.kernel.org/r/20170817000548.32038-9-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Kirill A. Shutemov Cc: Dan Williams Cc: Ross Zwisler Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memremap.h | 13 +++++++++++++ include/linux/mm.h | 31 ++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8e164ec9eed0..8aa6b82679e2 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -126,6 +126,14 @@ struct dev_pagemap { void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); + +static inline bool is_zone_device_page(const struct page *page); + +static inline bool is_device_private_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -144,6 +152,11 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) { return NULL; } + +static inline bool is_device_private_page(const struct page *page) +{ + return false; +} #endif /** diff --git a/include/linux/mm.h b/include/linux/mm.h index a74c4e954352..eccdab4bb44a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -23,6 +23,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -792,25 +793,25 @@ static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } - -static inline bool is_device_private_page(const struct page *page) -{ - /* See MEMORY_DEVICE_PRIVATE in include/linux/memory_hotplug.h */ - return ((page_zonenum(page) == ZONE_DEVICE) && - (page->pgmap->type == MEMORY_DEVICE_PRIVATE)); -} #else static inline bool is_zone_device_page(const struct page *page) { return false; } +#endif -static inline bool is_device_private_page(const struct page *page) +#ifdef CONFIG_DEVICE_PRIVATE +void put_zone_device_private_page(struct page *page); +#else +static inline void put_zone_device_private_page(struct page *page) { - return false; } #endif +static inline bool is_device_private_page(const struct page *page); + +DECLARE_STATIC_KEY_FALSE(device_private_key); + static inline void get_page(struct page *page) { page = compound_head(page); @@ -826,6 +827,18 @@ static inline void put_page(struct page *page) { page = compound_head(page); + /* + * For private device pages we need to catch refcount transition from + * 2 to 1, when refcount reach one it means the private device page is + * free and we need to inform the device driver through callback. See + * include/linux/memremap.h and HMM for details. + */ + if (static_branch_unlikely(&device_private_key) && + unlikely(is_device_private_page(page))) { + put_zone_device_private_page(page); + return; + } + if (put_page_testzero(page)) __put_page(page); } -- cgit From 4ef589dc9b10cdcae75a2b2b0e9b2c5e8a92c378 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:11:58 -0700 Subject: mm/hmm/devmem: device memory hotplug using ZONE_DEVICE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduce a simple struct and associated helpers for device driver to use when hotpluging un-addressable device memory as ZONE_DEVICE. It will find a unuse physical address range and trigger memory hotplug for it which allocates and initialize struct page for the device memory. Device driver should use this helper during device initialization to hotplug the device memory. It should only need to remove the memory once the device is going offline (shutdown or hotremove). There should not be any userspace API to hotplug memory expect maybe for host device driver to allow to add more memory to a guest device driver. Device's memory is manage by the device driver and HMM only provides helpers to that effect. Link: http://lkml.kernel.org/r/20170817000548.32038-12-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Signed-off-by: Balbir Singh Cc: Aneesh Kumar Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 61a6535fe438..16f916b437cc 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -72,6 +72,11 @@ #if IS_ENABLED(CONFIG_HMM) +#include +#include +#include + + struct hmm; /* @@ -322,6 +327,156 @@ int hmm_vma_fault(struct vm_area_struct *vma, #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +struct hmm_devmem; + +struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, + unsigned long addr); + +/* + * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events + * + * @free: call when refcount on page reach 1 and thus is no longer use + * @fault: call when there is a page fault to unaddressable memory + * + * Both callback happens from page_free() and page_fault() callback of struct + * dev_pagemap respectively. See include/linux/memremap.h for more details on + * those. + * + * The hmm_devmem_ops callback are just here to provide a coherent and + * uniq API to device driver and device driver should not register their + * own page_free() or page_fault() but rely on the hmm_devmem_ops call- + * back. + */ +struct hmm_devmem_ops { + /* + * free() - free a device page + * @devmem: device memory structure (see struct hmm_devmem) + * @page: pointer to struct page being freed + * + * Call back occurs whenever a device page refcount reach 1 which + * means that no one is holding any reference on the page anymore + * (ZONE_DEVICE page have an elevated refcount of 1 as default so + * that they are not release to the general page allocator). + * + * Note that callback has exclusive ownership of the page (as no + * one is holding any reference). + */ + void (*free)(struct hmm_devmem *devmem, struct page *page); + /* + * fault() - CPU page fault or get user page (GUP) + * @devmem: device memory structure (see struct hmm_devmem) + * @vma: virtual memory area containing the virtual address + * @addr: virtual address that faulted or for which there is a GUP + * @page: pointer to struct page backing virtual address (unreliable) + * @flags: FAULT_FLAG_* (see include/linux/mm.h) + * @pmdp: page middle directory + * Returns: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR + * on error + * + * The callback occurs whenever there is a CPU page fault or GUP on a + * virtual address. This means that the device driver must migrate the + * page back to regular memory (CPU accessible). + * + * The device driver is free to migrate more than one page from the + * fault() callback as an optimization. However if device decide to + * migrate more than one page it must always priotirize the faulting + * address over the others. + * + * The struct page pointer is only given as an hint to allow quick + * lookup of internal device driver data. A concurrent migration + * might have already free that page and the virtual address might + * not longer be back by it. So it should not be modified by the + * callback. + * + * Note that mmap semaphore is held in read mode at least when this + * callback occurs, hence the vma is valid upon callback entry. + */ + int (*fault)(struct hmm_devmem *devmem, + struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); +}; + +/* + * struct hmm_devmem - track device memory + * + * @completion: completion object for device memory + * @pfn_first: first pfn for this resource (set by hmm_devmem_add()) + * @pfn_last: last pfn for this resource (set by hmm_devmem_add()) + * @resource: IO resource reserved for this chunk of memory + * @pagemap: device page map for that chunk + * @device: device to bind resource to + * @ops: memory operations callback + * @ref: per CPU refcount + * + * This an helper structure for device drivers that do not wish to implement + * the gory details related to hotplugging new memoy and allocating struct + * pages. + * + * Device drivers can directly use ZONE_DEVICE memory on their own if they + * wish to do so. + */ +struct hmm_devmem { + struct completion completion; + unsigned long pfn_first; + unsigned long pfn_last; + struct resource *resource; + struct device *device; + struct dev_pagemap pagemap; + const struct hmm_devmem_ops *ops; + struct percpu_ref ref; +}; + +/* + * To add (hotplug) device memory, HMM assumes that there is no real resource + * that reserves a range in the physical address space (this is intended to be + * use by unaddressable device memory). It will reserve a physical range big + * enough and allocate struct page for it. + * + * The device driver can wrap the hmm_devmem struct inside a private device + * driver struct. The device driver must call hmm_devmem_remove() before the + * device goes away and before freeing the hmm_devmem struct memory. + */ +struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, + struct device *device, + unsigned long size); +void hmm_devmem_remove(struct hmm_devmem *devmem); + +/* + * hmm_devmem_page_set_drvdata - set per-page driver data field + * + * @page: pointer to struct page + * @data: driver data value to set + * + * Because page can not be on lru we have an unsigned long that driver can use + * to store a per page field. This just a simple helper to do that. + */ +static inline void hmm_devmem_page_set_drvdata(struct page *page, + unsigned long data) +{ + unsigned long *drvdata = (unsigned long *)&page->pgmap; + + drvdata[1] = data; +} + +/* + * hmm_devmem_page_get_drvdata - get per page driver data field + * + * @page: pointer to struct page + * Return: driver data value + */ +static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page) +{ + unsigned long *drvdata = (unsigned long *)&page->pgmap; + + return drvdata[1]; +} +#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */ + + /* Below are for HMM internal use only! Not to be used by device driver! */ void hmm_mm_destroy(struct mm_struct *mm); -- cgit From 858b54dabf4363daa3a97b9a722130a8e7cea8c9 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:02 -0700 Subject: mm/hmm/devmem: dummy HMM device for ZONE_DEVICE memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduce a dummy HMM device class so device driver can use it to create hmm_device for the sole purpose of registering device memory. It is useful to device driver that want to manage multiple physical device memory under same struct device umbrella. Link: http://lkml.kernel.org/r/20170817000548.32038-13-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 16f916b437cc..67a03b20a2db 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -72,11 +72,11 @@ #if IS_ENABLED(CONFIG_HMM) +#include #include #include #include - struct hmm; /* @@ -474,6 +474,26 @@ static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page) return drvdata[1]; } + + +/* + * struct hmm_device - fake device to hang device memory onto + * + * @device: device struct + * @minor: device minor number + */ +struct hmm_device { + struct device device; + unsigned int minor; +}; + +/* + * A device driver that wants to handle multiple devices memory through a + * single fake device can use hmm_device to do so. This is purely a helper and + * it is not strictly needed, in order to make use of any HMM functionality. + */ +struct hmm_device *hmm_device_new(void *drvdata); +void hmm_device_put(struct hmm_device *hmm_device); #endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */ -- cgit From 2916ecc0f9d435d849c98f4da50e453124c87531 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:06 -0700 Subject: mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new migration mode that allow to offload the copy to a device DMA engine. This changes the workflow of migration and not all address_space migratepage callback can support this. This is intended to be use by migrate_vma() which itself is use for thing like HMM (see include/linux/hmm.h). No additional per-filesystem migratepage testing is needed. I disables MIGRATE_SYNC_NO_COPY in all problematic migratepage() callback and i added comment in those to explain why (part of this patch). The commit message is unclear it should say that any callback that wish to support this new mode need to be aware of the difference in the migration flow from other mode. Some of these callbacks do extra locking while copying (aio, zsmalloc, balloon, ...) and for DMA to be effective you want to copy multiple pages in one DMA operations. But in the problematic case you can not easily hold the extra lock accross multiple call to this callback. Usual flow is: For each page { 1 - lock page 2 - call migratepage() callback 3 - (extra locking in some migratepage() callback) 4 - migrate page state (freeze refcount, update page cache, buffer head, ...) 5 - copy page 6 - (unlock any extra lock of migratepage() callback) 7 - return from migratepage() callback 8 - unlock page } The new mode MIGRATE_SYNC_NO_COPY: 1 - lock multiple pages For each page { 2 - call migratepage() callback 3 - abort in all problematic migratepage() callback 4 - migrate page state (freeze refcount, update page cache, buffer head, ...) } // finished all calls to migratepage() callback 5 - DMA copy multiple pages 6 - unlock all the pages To support MIGRATE_SYNC_NO_COPY in the problematic case we would need a new callback migratepages() (for instance) that deals with multiple pages in one transaction. Because the problematic cases are not important for current usage I did not wanted to complexify this patchset even more for no good reason. Link: http://lkml.kernel.org/r/20170817000548.32038-14-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 5 +++++ include/linux/migrate_mode.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ce15989521a1..7db4c812a2a6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -72,6 +72,7 @@ extern void putback_movable_page(struct page *page); extern int migrate_prep(void); extern int migrate_prep_local(void); +extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); @@ -92,6 +93,10 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } +static inline void migrate_page_states(struct page *newpage, struct page *page) +{ +} + static inline void migrate_page_copy(struct page *newpage, struct page *page) {} diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index ebf3d89a3919..bdf66af9b937 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -6,11 +6,16 @@ * on most operations but not ->writepage as the potential stall time * is too significant * MIGRATE_SYNC will block when migrating pages + * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages + * with the CPU. Instead, page copy happens outside the migratepage() + * callback and is likely using a DMA engine. See migrate_vma() and HMM + * (mm/hmm.c) for users of this mode. */ enum migrate_mode { MIGRATE_ASYNC, MIGRATE_SYNC_LIGHT, MIGRATE_SYNC, + MIGRATE_SYNC_NO_COPY, }; #endif /* MIGRATE_MODE_H_INCLUDED */ -- cgit From 8763cb45ab967a92a5ee49e9c544c0f0ea90e2d6 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:09 -0700 Subject: mm/migrate: new memory migration helper for use with device memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch add a new memory migration helpers, which migrate memory backing a range of virtual address of a process to different memory (which can be allocated through special allocator). It differs from numa migration by working on a range of virtual address and thus by doing migration in chunk that can be large enough to use DMA engine or special copy offloading engine. Expected users are any one with heterogeneous memory where different memory have different characteristics (latency, bandwidth, ...). As an example IBM platform with CAPI bus can make use of this feature to migrate between regular memory and CAPI device memory. New CPU architecture with a pool of high performance memory not manage as cache but presented as regular memory (while being faster and with lower latency than DDR) will also be prime user of this patch. Migration to private device memory will be useful for device that have large pool of such like GPU, NVidia plans to use HMM for that. Link: http://lkml.kernel.org/r/20170817000548.32038-15-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Evgeny Baskakov Signed-off-by: John Hubbard Signed-off-by: Mark Hairgrove Signed-off-by: Sherry Cheung Signed-off-by: Subhash Gutti Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 7db4c812a2a6..8f73cebfc3f5 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -156,4 +156,108 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, } #endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/ + +#ifdef CONFIG_MIGRATION + +#define MIGRATE_PFN_VALID (1UL << 0) +#define MIGRATE_PFN_MIGRATE (1UL << 1) +#define MIGRATE_PFN_LOCKED (1UL << 2) +#define MIGRATE_PFN_WRITE (1UL << 3) +#define MIGRATE_PFN_ERROR (1UL << 4) +#define MIGRATE_PFN_SHIFT 5 + +static inline struct page *migrate_pfn_to_page(unsigned long mpfn) +{ + if (!(mpfn & MIGRATE_PFN_VALID)) + return NULL; + return pfn_to_page(mpfn >> MIGRATE_PFN_SHIFT); +} + +static inline unsigned long migrate_pfn(unsigned long pfn) +{ + return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; +} + +/* + * struct migrate_vma_ops - migrate operation callback + * + * @alloc_and_copy: alloc destination memory and copy source memory to it + * @finalize_and_map: allow caller to map the successfully migrated pages + * + * + * The alloc_and_copy() callback happens once all source pages have been locked, + * unmapped and checked (checked whether pinned or not). All pages that can be + * migrated will have an entry in the src array set with the pfn value of the + * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other + * flags might be set but should be ignored by the callback). + * + * The alloc_and_copy() callback can then allocate destination memory and copy + * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and + * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the + * callback must update each corresponding entry in the dst array with the pfn + * value of the destination page and with the MIGRATE_PFN_VALID and + * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages + * locked, via lock_page()). + * + * At this point the alloc_and_copy() callback is done and returns. + * + * Note that the callback does not have to migrate all the pages that are + * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration + * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also + * set in the src array entry). If the device driver cannot migrate a device + * page back to system memory, then it must set the corresponding dst array + * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to + * access any of the virtual addresses originally backed by this page. Because + * a SIGBUS is such a severe result for the userspace process, the device + * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an + * unrecoverable state. + * + * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES + * OR BAD THINGS WILL HAPPEN ! + * + * + * The finalize_and_map() callback happens after struct page migration from + * source to destination (destination struct pages are the struct pages for the + * memory allocated by the alloc_and_copy() callback). Migration can fail, and + * thus the finalize_and_map() allows the driver to inspect which pages were + * successfully migrated, and which were not. Successfully migrated pages will + * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. + * + * It is safe to update device page table from within the finalize_and_map() + * callback because both destination and source page are still locked, and the + * mmap_sem is held in read mode (hence no one can unmap the range being + * migrated). + * + * Once callback is done cleaning up things and updating its page table (if it + * chose to do so, this is not an obligation) then it returns. At this point, + * the HMM core will finish up the final steps, and the migration is complete. + * + * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY + * ENTRIES OR BAD THINGS WILL HAPPEN ! + */ +struct migrate_vma_ops { + void (*alloc_and_copy)(struct vm_area_struct *vma, + const unsigned long *src, + unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); + void (*finalize_and_map)(struct vm_area_struct *vma, + const unsigned long *src, + const unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); +}; + +int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private); + +#endif /* CONFIG_MIGRATION */ + #endif /* _LINUX_MIGRATE_H */ -- cgit From a5430dda8a3a1cdd532e37270e6f36436241b6e7 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:17 -0700 Subject: mm/migrate: support un-addressable ZONE_DEVICE page in migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow to unmap and restore special swap entry of un-addressable ZONE_DEVICE memory. Link: http://lkml.kernel.org/r/20170817000548.32038-17-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Kirill A. Shutemov Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 8f73cebfc3f5..8dc8f0a3f1af 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -159,12 +159,18 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, #ifdef CONFIG_MIGRATION +/* + * Watch out for PAE architecture, which has an unsigned long, and might not + * have enough bits to store all physical address and flags. So far we have + * enough room for all our flags. + */ #define MIGRATE_PFN_VALID (1UL << 0) #define MIGRATE_PFN_MIGRATE (1UL << 1) #define MIGRATE_PFN_LOCKED (1UL << 2) #define MIGRATE_PFN_WRITE (1UL << 3) -#define MIGRATE_PFN_ERROR (1UL << 4) -#define MIGRATE_PFN_SHIFT 5 +#define MIGRATE_PFN_DEVICE (1UL << 4) +#define MIGRATE_PFN_ERROR (1UL << 5) +#define MIGRATE_PFN_SHIFT 6 static inline struct page *migrate_pfn_to_page(unsigned long mpfn) { -- cgit From 8315ada7f095bfa2cae0cd1e915b95bf6226897d Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:21 -0700 Subject: mm/migrate: allow migrate_vma() to alloc new page on empty entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows callers of migrate_vma() to allocate new page for empty CPU page table entry (pte_none or back by zero page). This is only for anonymous memory and it won't allow new page to be instanced if the userfaultfd is armed. This is useful to device driver that want to migrate a range of virtual address and would rather allocate new memory than having to fault later on. Link: http://lkml.kernel.org/r/20170817000548.32038-18-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Aneesh Kumar Cc: Balbir Singh Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 8dc8f0a3f1af..d4e6d12a0b40 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -218,6 +218,15 @@ static inline unsigned long migrate_pfn(unsigned long pfn) * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an * unrecoverable state. * + * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we + * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus + * allowing device driver to allocate device memory for those unback virtual + * address. For this the device driver simply have to allocate device memory + * and properly set the destination entry like for regular migration. Note that + * this can still fails and thus inside the device driver must check if the + * migration was successful for those entry inside the finalize_and_map() + * callback just like for regular migration. + * * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES * OR BAD THINGS WILL HAPPEN ! * -- cgit From df6ad69838fc9dcdbee0dcf2fc2c6f1113f8d609 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:24 -0700 Subject: mm/device-public-memory: device memory cache coherent with CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform with advance system bus (like CAPI or CCIX) allow device memory to be accessible from CPU in a cache coherent fashion. Add a new type of ZONE_DEVICE to represent such memory. The use case are the same as for the un-addressable device memory but without all the corners cases. Link: http://lkml.kernel.org/r/20170817000548.32038-19-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Aneesh Kumar Cc: Paul E. McKenney Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: Ross Zwisler Cc: Balbir Singh Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 4 ++-- include/linux/ioport.h | 1 + include/linux/memremap.h | 21 +++++++++++++++++++++ include/linux/mm.h | 20 ++++++++++++-------- 4 files changed, 36 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 67a03b20a2db..6d3b0b4fed4e 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -327,7 +327,7 @@ int hmm_vma_fault(struct vm_area_struct *vma, #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) struct hmm_devmem; struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, @@ -494,7 +494,7 @@ struct hmm_device { */ struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); -#endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */ +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ /* Below are for HMM internal use only! Not to be used by device driver! */ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 3a4f69137bc2..f5cf32e80041 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -131,6 +131,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, + IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, }; /* helpers to define resources */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 8aa6b82679e2..f8ee1c73ad2d 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -57,10 +57,18 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/vm/hmm.txt. + * + * MEMORY_DEVICE_PUBLIC: + * Device memory that is cache coherent from device and CPU point of view. This + * is use on platform that have an advance system bus (like CAPI or CCIX). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allow to pin such memory so that it can always be evicted. */ enum memory_type { MEMORY_DEVICE_HOST = 0, MEMORY_DEVICE_PRIVATE, + MEMORY_DEVICE_PUBLIC, }; /* @@ -92,6 +100,8 @@ enum memory_type { * The page_free() callback is called once the page refcount reaches 1 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. * This allows the device driver to implement its own memory management.) + * + * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. */ typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, unsigned long addr, @@ -134,6 +144,12 @@ static inline bool is_device_private_page(const struct page *page) return is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PRIVATE; } + +static inline bool is_device_public_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PUBLIC; +} #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -157,6 +173,11 @@ static inline bool is_device_private_page(const struct page *page) { return false; } + +static inline bool is_device_public_page(const struct page *page) +{ + return false; +} #endif /** diff --git a/include/linux/mm.h b/include/linux/mm.h index eccdab4bb44a..de66a1127db4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -800,15 +800,16 @@ static inline bool is_zone_device_page(const struct page *page) } #endif -#ifdef CONFIG_DEVICE_PRIVATE -void put_zone_device_private_page(struct page *page); +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +void put_zone_device_private_or_public_page(struct page *page); #else -static inline void put_zone_device_private_page(struct page *page) +static inline void put_zone_device_private_or_public_page(struct page *page) { } -#endif +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ static inline bool is_device_private_page(const struct page *page); +static inline bool is_device_public_page(const struct page *page); DECLARE_STATIC_KEY_FALSE(device_private_key); @@ -834,8 +835,9 @@ static inline void put_page(struct page *page) * include/linux/memremap.h and HMM for details. */ if (static_branch_unlikely(&device_private_key) && - unlikely(is_device_private_page(page))) { - put_zone_device_private_page(page); + unlikely(is_device_private_page(page) || + is_device_public_page(page))) { + put_zone_device_private_or_public_page(page); return; } @@ -1224,8 +1226,10 @@ struct zap_details { pgoff_t last_index; /* Highest page->index to unmap */ }; -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte); +struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte, bool with_public_device); +#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false) + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); -- cgit From d3df0a423397c9a1ae05c3857e8c04240dd85e68 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:28 -0700 Subject: mm/hmm: add new helper to hotplug CDM memory region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike unaddressable memory, coherent device memory has a real resource associated with it on the system (as CPU can address it). Add a new helper to hotplug such memory within the HMM framework. Link: http://lkml.kernel.org/r/20170817000548.32038-20-jglisse@redhat.com Signed-off-by: Jérôme Glisse Reviewed-by: Balbir Singh Cc: Aneesh Kumar Cc: Benjamin Herrenschmidt Cc: Dan Williams Cc: David Nellans Cc: Evgeny Baskakov Cc: Johannes Weiner Cc: John Hubbard Cc: Kirill A. Shutemov Cc: Mark Hairgrove Cc: Michal Hocko Cc: Paul E. McKenney Cc: Ross Zwisler Cc: Sherry Cheung Cc: Subhash Gutti Cc: Vladimir Davydov Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 6d3b0b4fed4e..8385e75356ca 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -443,6 +443,9 @@ struct hmm_devmem { struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, struct device *device, unsigned long size); +struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, + struct device *device, + struct resource *res); void hmm_devmem_remove(struct hmm_devmem *devmem); /* -- cgit From 6b368cd4a44ce95b33f1d31f2f932e6ae707f319 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:32 -0700 Subject: mm/hmm: avoid bloating arch that do not make use of HMM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves all new code including new page migration helper behind kernel Kconfig option so that there is no codee bloat for arch or user that do not want to use HMM or any of its associated features. arm allyesconfig (without all the patchset, then with and this patch): text data bss dec hex filename 83721896 46511131 27582964 157815991 96814b7 ../without/vmlinux 83722364 46511131 27582964 157816459 968168b vmlinux [jglisse@redhat.com: struct hmm is only use by HMM mirror functionality] Link: http://lkml.kernel.org/r/20170825213133.27286-1-jglisse@redhat.com [sfr@canb.auug.org.au: fix build (arm multi_v7_defconfig)] Link: http://lkml.kernel.org/r/20170828181849.323ab81b@canb.auug.org.au Link: http://lkml.kernel.org/r/20170818032858.7447-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Stephen Rothwell Cc: Dan Williams Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 9 ++++++--- include/linux/memremap.h | 22 +++++++--------------- include/linux/migrate.h | 13 +++++++++++++ include/linux/mm.h | 26 +++++++++++++++++--------- 4 files changed, 43 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 8385e75356ca..28e14345bd8d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -501,18 +501,21 @@ void hmm_device_put(struct hmm_device *hmm_device); /* Below are for HMM internal use only! Not to be used by device driver! */ +#if IS_ENABLED(CONFIG_HMM_MIRROR) void hmm_mm_destroy(struct mm_struct *mm); static inline void hmm_mm_init(struct mm_struct *mm) { mm->hmm = NULL; } +#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -#else /* IS_ENABLED(CONFIG_HMM) */ -/* Below are for HMM internal use only! Not to be used by device driver! */ +#else /* IS_ENABLED(CONFIG_HMM) */ static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {} - #endif /* IS_ENABLED(CONFIG_HMM) */ #endif /* LINUX_HMM_H */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index f8ee1c73ad2d..79f8ba7c3894 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -138,18 +138,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct dev_pagemap *find_dev_pagemap(resource_size_t phys); static inline bool is_zone_device_page(const struct page *page); - -static inline bool is_device_private_page(const struct page *page) -{ - return is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; -} - -static inline bool is_device_public_page(const struct page *page) -{ - return is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PUBLIC; -} #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -168,17 +156,21 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) { return NULL; } +#endif +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) static inline bool is_device_private_page(const struct page *page) { - return false; + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; } static inline bool is_device_public_page(const struct page *page) { - return false; + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PUBLIC; } -#endif +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ /** * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn diff --git a/include/linux/migrate.h b/include/linux/migrate.h index d4e6d12a0b40..643c7ae7d7b4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -265,6 +265,7 @@ struct migrate_vma_ops { void *private); }; +#if defined(CONFIG_MIGRATE_VMA_HELPER) int migrate_vma(const struct migrate_vma_ops *ops, struct vm_area_struct *vma, unsigned long start, @@ -272,6 +273,18 @@ int migrate_vma(const struct migrate_vma_ops *ops, unsigned long *src, unsigned long *dst, void *private); +#else +static inline int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private) +{ + return -EINVAL; +} +#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */ #endif /* CONFIG_MIGRATION */ diff --git a/include/linux/mm.h b/include/linux/mm.h index de66a1127db4..5195e272fc4a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -800,18 +800,27 @@ static inline bool is_zone_device_page(const struct page *page) } #endif -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) void put_zone_device_private_or_public_page(struct page *page); -#else +DECLARE_STATIC_KEY_FALSE(device_private_key); +#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key) +static inline bool is_device_private_page(const struct page *page); +static inline bool is_device_public_page(const struct page *page); +#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ static inline void put_zone_device_private_or_public_page(struct page *page) { } +#define IS_HMM_ENABLED 0 +static inline bool is_device_private_page(const struct page *page) +{ + return false; +} +static inline bool is_device_public_page(const struct page *page) +{ + return false; +} #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ -static inline bool is_device_private_page(const struct page *page); -static inline bool is_device_public_page(const struct page *page); - -DECLARE_STATIC_KEY_FALSE(device_private_key); static inline void get_page(struct page *page) { @@ -834,9 +843,8 @@ static inline void put_page(struct page *page) * free and we need to inform the device driver through callback. See * include/linux/memremap.h and HMM for details. */ - if (static_branch_unlikely(&device_private_key) && - unlikely(is_device_private_page(page) || - is_device_public_page(page))) { + if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) || + unlikely(is_device_public_page(page)))) { put_zone_device_private_or_public_page(page); return; } -- cgit From de540a9763efcd5b6339158ac2e5932fb3e691b9 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Fri, 8 Sep 2017 16:12:35 -0700 Subject: mm/hmm: fix build when HMM is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combinatorial Kconfig is painfull. Withi this patch all below combination build. 1) 2) CONFIG_HMM_MIRROR=y 3) CONFIG_DEVICE_PRIVATE=y 4) CONFIG_DEVICE_PUBLIC=y 5) CONFIG_HMM_MIRROR=y CONFIG_DEVICE_PUBLIC=y 6) CONFIG_HMM_MIRROR=y CONFIG_DEVICE_PRIVATE=y 7) CONFIG_DEVICE_PRIVATE=y CONFIG_DEVICE_PUBLIC=y 8) CONFIG_HMM_MIRROR=y CONFIG_DEVICE_PRIVATE=y CONFIG_DEVICE_PUBLIC=y Link: http://lkml.kernel.org/r/20170826002149.20919-1-jglisse@redhat.com Reported-by: Randy Dunlap Signed-off-by: Jérôme Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 28e14345bd8d..96e69979f84d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -498,7 +498,7 @@ struct hmm_device { struct hmm_device *hmm_device_new(void *drvdata); void hmm_device_put(struct hmm_device *hmm_device); #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ - +#endif /* IS_ENABLED(CONFIG_HMM) */ /* Below are for HMM internal use only! Not to be used by device driver! */ #if IS_ENABLED(CONFIG_HMM_MIRROR) @@ -517,5 +517,4 @@ static inline void hmm_mm_init(struct mm_struct *mm) {} #else /* IS_ENABLED(CONFIG_HMM) */ static inline void hmm_mm_destroy(struct mm_struct *mm) {} static inline void hmm_mm_init(struct mm_struct *mm) {} -#endif /* IS_ENABLED(CONFIG_HMM) */ #endif /* LINUX_HMM_H */ -- cgit From 3a321d2a3dde812142e06ab5c2f062ed860182a5 Mon Sep 17 00:00:00 2001 From: Kemi Wang Date: Fri, 8 Sep 2017 16:12:48 -0700 Subject: mm: change the call sites of numa statistics items Patch series "Separate NUMA statistics from zone statistics", v2. Each page allocation updates a set of per-zone statistics with a call to zone_statistics(). As discussed in 2017 MM summit, these are a substantial source of overhead in the page allocator and are very rarely consumed. This significant overhead in cache bouncing caused by zone counters (NUMA associated counters) update in parallel in multi-threaded page allocation (pointed out by Dave Hansen). A link to the MM summit slides: http://people.netfilter.org/hawk/presentations/MM-summit2017/MM-summit2017-JesperBrouer.pdf To mitigate this overhead, this patchset separates NUMA statistics from zone statistics framework, and update NUMA counter threshold to a fixed size of MAX_U16 - 2, as a small threshold greatly increases the update frequency of the global counter from local per cpu counter (suggested by Ying Huang). The rationality is that these statistics counters don't need to be read often, unlike other VM counters, so it's not a problem to use a large threshold and make readers more expensive. With this patchset, we see 31.3% drop of CPU cycles(537-->369, see below) for per single page allocation and reclaim on Jesper's page_bench03 benchmark. Meanwhile, this patchset keeps the same style of virtual memory statistics with little end-user-visible effects (only move the numa stats to show behind zone page stats, see the first patch for details). I did an experiment of single page allocation and reclaim concurrently using Jesper's page_bench03 benchmark on a 2-Socket Broadwell-based server (88 processors with 126G memory) with different size of threshold of pcp counter. Benchmark provided by Jesper D Brouer(increase loop times to 10000000): https://github.com/netoptimizer/prototype-kernel/tree/master/kernel/mm/bench Threshold CPU cycles Throughput(88 threads) 32 799 241760478 64 640 301628829 125 537 358906028 <==> system by default 256 468 412397590 512 428 450550704 4096 399 482520943 20000 394 489009617 30000 395 488017817 65533 369(-31.3%) 521661345(+45.3%) <==> with this patchset N/A 342(-36.3%) 562900157(+56.8%) <==> disable zone_statistics This patch (of 3): In this patch, NUMA statistics is separated from zone statistics framework, all the call sites of NUMA stats are changed to use numa-stats-specific functions, it does not have any functionality change except that the number of NUMA stats is shown behind zone page stats when users *read* the zone info. E.g. cat /proc/zoneinfo ***Base*** ***With this patch*** nr_free_pages 3976 nr_free_pages 3976 nr_zone_inactive_anon 0 nr_zone_inactive_anon 0 nr_zone_active_anon 0 nr_zone_active_anon 0 nr_zone_inactive_file 0 nr_zone_inactive_file 0 nr_zone_active_file 0 nr_zone_active_file 0 nr_zone_unevictable 0 nr_zone_unevictable 0 nr_zone_write_pending 0 nr_zone_write_pending 0 nr_mlock 0 nr_mlock 0 nr_page_table_pages 0 nr_page_table_pages 0 nr_kernel_stack 0 nr_kernel_stack 0 nr_bounce 0 nr_bounce 0 nr_zspages 0 nr_zspages 0 numa_hit 0 *nr_free_cma 0* numa_miss 0 numa_hit 0 numa_foreign 0 numa_miss 0 numa_interleave 0 numa_foreign 0 numa_local 0 numa_interleave 0 numa_other 0 numa_local 0 *nr_free_cma 0* numa_other 0 ... ... vm stats threshold: 10 vm stats threshold: 10 ... ... The next patch updates the numa stats counter size and threshold. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1503568801-21305-2-git-send-email-kemi.wang@intel.com Signed-off-by: Kemi Wang Reported-by: Jesper Dangaard Brouer Acked-by: Mel Gorman Cc: Michal Hocko Cc: Johannes Weiner Cc: Christopher Lameter Cc: Dave Hansen Cc: Andi Kleen Cc: Ying Huang Cc: Aaron Lu Cc: Tim Chen Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 25 +++++++++++++++++-------- include/linux/vmstat.h | 29 ++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e7e92c8f4883..e65d91c02e30 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,6 +114,20 @@ struct zone_padding { #define ZONE_PADDING(name) #endif +#ifdef CONFIG_NUMA +enum numa_stat_item { + NUMA_HIT, /* allocated in intended node */ + NUMA_MISS, /* allocated in non intended node */ + NUMA_FOREIGN, /* was intended here, hit elsewhere */ + NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ + NUMA_LOCAL, /* allocation from local node */ + NUMA_OTHER, /* allocation from other node */ + NR_VM_NUMA_STAT_ITEMS +}; +#else +#define NR_VM_NUMA_STAT_ITEMS 0 +#endif + enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, @@ -131,14 +145,6 @@ enum zone_stat_item { NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ -#endif -#ifdef CONFIG_NUMA - NUMA_HIT, /* allocated in intended node */ - NUMA_MISS, /* allocated in non intended node */ - NUMA_FOREIGN, /* was intended here, hit elsewhere */ - NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ - NUMA_LOCAL, /* allocation from local node */ - NUMA_OTHER, /* allocation from other node */ #endif NR_FREE_CMA_PAGES, NR_VM_ZONE_STAT_ITEMS }; @@ -276,6 +282,8 @@ struct per_cpu_pageset { struct per_cpu_pages pcp; #ifdef CONFIG_NUMA s8 expire; + s8 numa_stat_threshold; + s8 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; #endif #ifdef CONFIG_SMP s8 stat_threshold; @@ -496,6 +504,7 @@ struct zone { ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; + atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 97e11ab573f0..9ac82e29948f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -107,8 +107,33 @@ static inline void vm_events_fold_cpu(int cpu) * Zone and node-based page accounting with per cpu differentials. */ extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; +extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA +static inline void zone_numa_state_add(long x, struct zone *zone, + enum numa_stat_item item) +{ + atomic_long_add(x, &zone->vm_numa_stat[item]); + atomic_long_add(x, &vm_numa_stat[item]); +} + +static inline unsigned long global_numa_state(enum numa_stat_item item) +{ + long x = atomic_long_read(&vm_numa_stat[item]); + + return x; +} + +static inline unsigned long zone_numa_state(struct zone *zone, + enum numa_stat_item item) +{ + long x = atomic_long_read(&zone->vm_numa_stat[item]); + + return x; +} +#endif /* CONFIG_NUMA */ + static inline void zone_page_state_add(long x, struct zone *zone, enum zone_stat_item item) { @@ -194,8 +219,10 @@ static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat, #ifdef CONFIG_NUMA +extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); extern unsigned long sum_zone_node_page_state(int node, - enum zone_stat_item item); + enum zone_stat_item item); +extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item); extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); #else -- cgit From 1d90ca897cb05cf38bd62f36756d219e02913b7d Mon Sep 17 00:00:00 2001 From: Kemi Wang Date: Fri, 8 Sep 2017 16:12:52 -0700 Subject: mm: update NUMA counter threshold size There is significant overhead in cache bouncing caused by zone counters (NUMA associated counters) update in parallel in multi-threaded page allocation (suggested by Dave Hansen). This patch updates NUMA counter threshold to a fixed size of MAX_U16 - 2, as a small threshold greatly increases the update frequency of the global counter from local per cpu counter(suggested by Ying Huang). The rationality is that these statistics counters don't affect the kernel's decision, unlike other VM counters, so it's not a problem to use a large threshold. With this patchset, we see 31.3% drop of CPU cycles(537-->369) for per single page allocation and reclaim on Jesper's page_bench03 benchmark. Benchmark provided by Jesper D Brouer(increase loop times to 10000000): https://github.com/netoptimizer/prototype-kernel/tree/master/kernel/mm/ bench Threshold CPU cycles Throughput(88 threads) 32 799 241760478 64 640 301628829 125 537 358906028 <==> system by default (base) 256 468 412397590 512 428 450550704 4096 399 482520943 20000 394 489009617 30000 395 488017817 65533 369(-31.3%) 521661345(+45.3%) <==> with this patchset N/A 342(-36.3%) 562900157(+56.8%) <==> disable zone_statistics Link: http://lkml.kernel.org/r/1503568801-21305-3-git-send-email-kemi.wang@intel.com Signed-off-by: Kemi Wang Reported-by: Jesper Dangaard Brouer Suggested-by: Dave Hansen Suggested-by: Ying Huang Acked-by: Mel Gorman Cc: Aaron Lu Cc: Andi Kleen Cc: Christopher Lameter Cc: Johannes Weiner Cc: Michal Hocko Cc: Tim Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e65d91c02e30..356a814e7c8e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -282,8 +282,7 @@ struct per_cpu_pageset { struct per_cpu_pages pcp; #ifdef CONFIG_NUMA s8 expire; - s8 numa_stat_threshold; - s8 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; + u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; #endif #ifdef CONFIG_SMP s8 stat_threshold; -- cgit From 638032224ed762a29baca1fc37f1168efc2554ae Mon Sep 17 00:00:00 2001 From: Kemi Wang Date: Fri, 8 Sep 2017 16:12:55 -0700 Subject: mm: consider the number in local CPUs when reading NUMA stats To avoid deviation, the per cpu number of NUMA stats in vm_numa_stat_diff[] is included when a user *reads* the NUMA stats. Since NUMA stats does not be read by users frequently, and kernel does not need it to make a decision, it will not be a problem to make the readers more expensive. Link: http://lkml.kernel.org/r/1503568801-21305-4-git-send-email-kemi.wang@intel.com Signed-off-by: Kemi Wang Reported-by: Jesper Dangaard Brouer Acked-by: Mel Gorman Cc: Aaron Lu Cc: Andi Kleen Cc: Christopher Lameter Cc: Dave Hansen Cc: Johannes Weiner Cc: Michal Hocko Cc: Tim Chen Cc: Ying Huang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9ac82e29948f..ade7cb5f1359 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -125,10 +125,14 @@ static inline unsigned long global_numa_state(enum numa_stat_item item) return x; } -static inline unsigned long zone_numa_state(struct zone *zone, +static inline unsigned long zone_numa_state_snapshot(struct zone *zone, enum numa_stat_item item) { long x = atomic_long_read(&zone->vm_numa_stat[item]); + int cpu; + + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]; return x; } -- cgit From 855d97657d4d335970622f0d95ca4966d3f77ee7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 8 Sep 2017 16:13:38 -0700 Subject: proc: uninline proc_create() Save some code from ~320 invocations all clearing last argument. add/remove: 3/0 grow/shrink: 0/158 up/down: 45/-702 (-657) function old new delta proc_create - 17 +17 __ksymtab_proc_create - 16 +16 __kstrtab_proc_create - 12 +12 yam_init_driver 301 298 -3 ... cifs_proc_init 249 228 -21 via_fb_pci_probe 2304 2280 -24 Link: http://lkml.kernel.org/r/20170819094702.GA27864@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2d2bf592d9db..76124dd4e36d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,13 +28,7 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t, const struct file_operations *, void *); -static inline struct proc_dir_entry *proc_create( - const char *name, umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops) -{ - return proc_create_data(name, mode, parent, proc_fops, NULL); -} - +struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops); extern void proc_set_size(struct proc_dir_entry *, loff_t); extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t); extern void *PDE_DATA(const struct inode *); -- cgit From 604df322363e5770735df85368f83cac4a955a24 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 8 Sep 2017 16:13:45 -0700 Subject: linux/kernel.h: move DIV_ROUND_DOWN_ULL() macro This macro is useful to avoid link error on 32-bit systems. We have the same definition in two drivers, so move it to include/linux/kernel.h While we are here, refactor DIV_ROUND_UP_ULL() by using DIV_ROUND_DOWN_ULL(). Link: http://lkml.kernel.org/r/1500945156-12907-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Mark Brown Cc: Cyrille Pitchen Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Liam Girdwood Cc: Boris Brezillon Cc: Marek Vasut Cc: Brian Norris Cc: Richard Weinberger Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6607225d0ea4..0ad4c3044cf9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -78,8 +78,11 @@ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP -#define DIV_ROUND_UP_ULL(ll,d) \ - ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_DOWN_ULL(ll, d) \ + ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d)) #if BITS_PER_LONG == 32 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) -- cgit From 3b3c4babd898715926d24ae10aa64778ace33aae Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 8 Sep 2017 16:13:48 -0700 Subject: lib/string.c: add multibyte memset functions Patch series "Multibyte memset variations", v4. A relatively common idiom we're missing is a function to fill an area of memory with a pattern which is larger than a single byte. I first noticed this with a zram patch which wanted to fill a page with an 'unsigned long' value. There turn out to be quite a few places in the kernel which can benefit from using an optimised function rather than a loop; sometimes text size, sometimes speed, and sometimes both. The optimised PowerPC version (not included here) improves performance by about 30% on POWER8 on just the raw memset_l(). Most of the extra lines of code come from the three testcases I added. This patch (of 8): memset16(), memset32() and memset64() are like memset(), but allow the caller to fill the destination with a value larger than a single byte. memset_l() and memset_p() allow the caller to use unsigned long and pointer values respectively. Link: http://lkml.kernel.org/r/20170720184539.31609-2-willy@infradead.org Signed-off-by: Matthew Wilcox Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: David Miller Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Minchan Kim Cc: Ralf Baechle Cc: Richard Henderson Cc: Russell King Cc: Sam Ravnborg Cc: Sergey Senozhatsky Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index a467e617eeb0..c8bdafffd2f0 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -99,6 +99,36 @@ extern __kernel_size_t strcspn(const char *,const char *); #ifndef __HAVE_ARCH_MEMSET extern void * memset(void *,int,__kernel_size_t); #endif + +#ifndef __HAVE_ARCH_MEMSET16 +extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET32 +extern void *memset32(uint32_t *, uint32_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET64 +extern void *memset64(uint64_t *, uint64_t, __kernel_size_t); +#endif + +static inline void *memset_l(unsigned long *p, unsigned long v, + __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, v, n); + else + return memset64((uint64_t *)p, v, n); +} + +static inline void *memset_p(void **p, void *v, __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, (uintptr_t)v, n); + else + return memset64((uint64_t *)p, (uintptr_t)v, n); +} + #ifndef __HAVE_ARCH_MEMCPY extern void * memcpy(void *,const void *,__kernel_size_t); #endif -- cgit From ac036f9570a2d318b7d8dbbdbf0e269d7cc68cef Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 8 Sep 2017 16:14:15 -0700 Subject: vga: optimise console scrolling Where possible, call memset16(), memmove() or memcpy() instead of using open-coded loops. I don't like the calling convention that uses a byte count instead of a count of u16s, but it's a little late to change that. Reduces code size of fbcon.o by almost 400 bytes on my laptop build. [akpm@linux-foundation.org: fix build] Link: http://lkml.kernel.org/r/20170720184539.31609-9-willy@infradead.org Signed-off-by: Matthew Wilcox Cc: Ralf Baechle Cc: David Miller Cc: Sam Ravnborg Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Minchan Kim Cc: Richard Henderson Cc: Russell King Cc: Sergey Senozhatsky Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vt_buffer.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h index f38c10ba3ff5..30b6e0d2a942 100644 --- a/include/linux/vt_buffer.h +++ b/include/linux/vt_buffer.h @@ -13,6 +13,7 @@ #ifndef _LINUX_VT_BUFFER_H_ #define _LINUX_VT_BUFFER_H_ +#include #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_MDA_CONSOLE) #include @@ -26,24 +27,33 @@ #ifndef VT_BUF_HAVE_MEMSETW static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { +#ifdef VT_BUF_HAVE_RW count /= 2; while (count--) scr_writew(c, s++); +#else + memset16(s, c, count / 2); +#endif } #endif #ifndef VT_BUF_HAVE_MEMCPYW static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count) { +#ifdef VT_BUF_HAVE_RW count /= 2; while (count--) scr_writew(scr_readw(s++), d++); +#else + memcpy(d, s, count); +#endif } #endif #ifndef VT_BUF_HAVE_MEMMOVEW static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) { +#ifdef VT_BUF_HAVE_RW if (d < s) scr_memcpyw(d, s, count); else { @@ -53,6 +63,9 @@ static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) while (count--) scr_writew(scr_readw(--s), --d); } +#else + memmove(d, s, count); +#endif } #endif -- cgit From 9b130ad5bb8255ee8534d92d67e12b2a4887eacb Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 8 Sep 2017 16:14:18 -0700 Subject: treewide: make "nr_cpu_ids" unsigned First, number of CPUs can't be negative number. Second, different signnnedness leads to suboptimal code in the following cases: 1) kmalloc(nr_cpu_ids * sizeof(X)); "int" has to be sign extended to size_t. 2) while (loff_t *pos < nr_cpu_ids) MOVSXD is 1 byte longed than the same MOV. Other cases exist as well. Basically compiler is told that nr_cpu_ids can't be negative which can't be deduced if it is "int". Code savings on allyesconfig kernel: -3KB add/remove: 0/0 grow/shrink: 25/264 up/down: 261/-3631 (-3370) function old new delta coretemp_cpu_online 450 512 +62 rcu_init_one 1234 1272 +38 pci_device_probe 374 399 +25 ... pgdat_reclaimable_pages 628 556 -72 select_fallback_rq 446 369 -77 task_numa_find_cpu 1923 1807 -116 Link: http://lkml.kernel.org/r/20170819114959.GA30580@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4bf4479a3a80..68c5a8290275 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -32,15 +32,15 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) #if NR_CPUS == 1 -#define nr_cpu_ids 1 +#define nr_cpu_ids 1U #else -extern int nr_cpu_ids; +extern unsigned int nr_cpu_ids; #endif #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ -#define nr_cpumask_bits ((unsigned int)nr_cpu_ids) +#define nr_cpumask_bits nr_cpu_ids #else #define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif -- cgit From e9ef073a0796e46c24f037237291efe56fc28ad9 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 8 Sep 2017 16:14:29 -0700 Subject: include: warn for inconsistent endian config definition We have seen some generic code use config parameter CONFIG_CPU_BIG_ENDIAN to decide the endianness. Here are the few examples. include/asm-generic/qrwlock.h drivers/of/base.c drivers/of/fdt.c drivers/tty/serial/earlycon.c drivers/tty/serial/serial_core.c Display warning if CPU_BIG_ENDIAN is not defined on big endian architecture and also warn if it defined on little endian architectures. Here is our original discussion https://lkml.org/lkml/2017/5/24/620 Link: http://lkml.kernel.org/r/1499358861-179979-4-git-send-email-babu.moger@oracle.com Signed-off-by: Babu Moger Suggested-by: Arnd Bergmann Acked-by: Geert Uytterhoeven Cc: "James E.J. Bottomley" Cc: Alexander Viro Cc: David S. Miller Cc: Greg KH Cc: Helge Deller Cc: Ingo Molnar Cc: Jonas Bonn Cc: Max Filippov Cc: Michael Ellerman (powerpc) Cc: Michal Simek Cc: Peter Zijlstra Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/byteorder/big_endian.h | 4 ++++ include/linux/byteorder/little_endian.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 392041475c72..ffd215988392 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -3,5 +3,9 @@ #include +#ifndef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN +#endif + #include #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */ diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index 08057377aa23..ba910bb9aad0 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -3,5 +3,9 @@ #include +#ifdef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, CONFIG_CPU_BIG_ENDIAN is set +#endif + #include #endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */ -- cgit From c32ee3d9abd284b4fcaacc250b101f93829c7bae Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 8 Sep 2017 16:14:33 -0700 Subject: bitops: avoid integer overflow in GENMASK(_ULL) GENMASK(_ULL) performs a left-shift of ~0UL(L), which technically results in an integer overflow. clang raises a warning if the overflow occurs in a preprocessor expression. Clear the low-order bits through a substraction instead of the left-shift to avoid the overflow. (akpm: no change in .text size in my testing) Link: http://lkml.kernel.org/r/20170803212020.24939-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a83c822c35c2..8fbe259b197c 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -19,10 +19,11 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #define GENMASK(h, l) \ - (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ - (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); -- cgit From cd9e61ed1eebbcd5dfad59475d41ec58d9b64b6a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 8 Sep 2017 16:14:36 -0700 Subject: rbtree: cache leftmost node internally Patch series "rbtree: Cache leftmost node internally", v4. A series to extending rbtrees to internally cache the leftmost node such that we can have fast overlap check optimization for all interval tree users[1]. The benefits of this series are that: (i) Unify users that do internal leftmost node caching. (ii) Optimize all interval tree users. (iii) Convert at least two new users (epoll and procfs) to the new interface. This patch (of 16): Red-black tree semantics imply that nodes with smaller or greater (or equal for duplicates) keys always be to the left and right, respectively. For the kernel this is extremely evident when considering our rb_first() semantics. Enabling lookups for the smallest node in the tree in O(1) can save a good chunk of cycles in not having to walk down the tree each time. To this end there are a few core users that explicitly do this, such as the scheduler and rtmutexes. There is also the desire for interval trees to have this optimization allowing faster overlap checking. This patch introduces a new 'struct rb_root_cached' which is just the root with a cached pointer to the leftmost node. The reason why the regular rb_root was not extended instead of adding a new structure was that this allows the user to have the choice between memory footprint and actual tree performance. The new wrappers on top of the regular rb_root calls are: - rb_first_cached(cached_root) -- which is a fast replacement for rb_first. - rb_insert_color_cached(node, cached_root, new) - rb_erase_cached(node, cached_root) In addition, augmented cached interfaces are also added for basic insertion and deletion operations; which becomes important for the interval tree changes. With the exception of the inserts, which adds a bool for updating the new leftmost, the interfaces are kept the same. To this end, porting rb users to the cached version becomes really trivial, and keeping current rbtree semantics for users that don't care about the optimization requires zero overhead. Link: http://lkml.kernel.org/r/20170719014603.19029-2-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Reviewed-by: Jan Kara Acked-by: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 21 +++++++++++++++++++++ include/linux/rbtree_augmented.h | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index e585018498d5..d574361943ea 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -44,10 +44,25 @@ struct rb_root { struct rb_node *rb_node; }; +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) @@ -69,6 +84,12 @@ extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); +extern void rb_insert_color_cached(struct rb_node *, + struct rb_root_cached *, bool); +extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); +/* Same as rb_first(), but O(1) */ +#define rb_first_cached(root) (root)->rb_leftmost + /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 9702b6e183bc..6bfd2b581f75 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -41,7 +41,9 @@ struct rb_augment_callbacks { void (*rotate)(struct rb_node *old, struct rb_node *new); }; -extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, +extern void __rb_insert_augmented(struct rb_node *node, + struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. @@ -57,7 +59,16 @@ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - __rb_insert_augmented(node, root, augment->rotate); + __rb_insert_augmented(node, root, false, NULL, augment->rotate); +} + +static inline void +rb_insert_augmented_cached(struct rb_node *node, + struct rb_root_cached *root, bool newleft, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, &root->rb_root, + newleft, &root->rb_leftmost, augment->rotate); } #define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ @@ -150,6 +161,7 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, + struct rb_node **leftmost, const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right; @@ -157,6 +169,9 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, struct rb_node *parent, *rebalance; unsigned long pc; + if (leftmost && node == *leftmost) + *leftmost = rb_next(node); + if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) @@ -256,9 +271,21 @@ static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + struct rb_node *rebalance = __rb_erase_augmented(node, root, + NULL, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } +static __always_inline void +rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, + augment); + if (rebalance) + __rb_erase_color(rebalance, &root->rb_root, augment->rotate); +} + #endif /* _LINUX_RBTREE_AUGMENTED_H */ -- cgit From a23ba907d5e65d6aeea3e59c82fda9cd206a7aad Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 8 Sep 2017 16:15:01 -0700 Subject: locking/rtmutex: replace top-waiter and pi_waiters leftmost caching ... with the generic rbtree flavor instead. No changes in semantics whatsoever. Link: http://lkml.kernel.org/r/20170719014603.19029-10-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Acked-by: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 5 ++--- include/linux/rtmutex.h | 11 +++++------ include/linux/sched.h | 3 +-- 3 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 0e849715e5be..3c07ace5b431 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -175,9 +175,8 @@ extern struct cred init_cred; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ - .pi_waiters = RB_ROOT, \ - .pi_top_task = NULL, \ - .pi_waiters_leftmost = NULL, + .pi_waiters = RB_ROOT_CACHED, \ + .pi_top_task = NULL, #else # define INIT_RT_MUTEXES(tsk) #endif diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 44fd002f7cd5..53fcbe9de7fd 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -22,18 +22,17 @@ extern int max_lock_depth; /* for sysctl */ * The rt_mutex structure * * @wait_lock: spinlock to protect the structure - * @waiters: rbtree root to enqueue waiters in priority order - * @waiters_leftmost: top waiter + * @waiters: rbtree root to enqueue waiters in priority order; + * caches top-waiter (leftmost node). * @owner: the mutex owner */ struct rt_mutex { raw_spinlock_t wait_lock; - struct rb_root waiters; - struct rb_node *waiters_leftmost; + struct rb_root_cached waiters; struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; - const char *name, *file; + const char *name, *file; int line; void *magic; #endif @@ -84,7 +83,7 @@ do { \ #define __RT_MUTEX_INITIALIZER(mutexname) \ { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT \ + , .waiters = RB_ROOT_CACHED \ , .owner = NULL \ __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} diff --git a/include/linux/sched.h b/include/linux/sched.h index 68b38335d33c..92fb8dd5a9e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -812,8 +812,7 @@ struct task_struct { #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; + struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ -- cgit From f808c13fd3738948e10196496959871130612b61 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 8 Sep 2017 16:15:08 -0700 Subject: lib/interval_tree: fast overlap detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow interval trees to quickly check for overlaps to avoid unnecesary tree lookups in interval_tree_iter_first(). As of this patch, all interval tree flavors will require using a 'rb_root_cached' such that we can have the leftmost node easily available. While most users will make use of this feature, those with special functions (in addition to the generic insert, delete, search calls) will avoid using the cached option as they can do funky things with insertions -- for example, vma_interval_tree_insert_after(). [jglisse@redhat.com: fix deadlock from typo vm_lock_anon_vma()] Link: http://lkml.kernel.org/r/20170808225719.20723-1-jglisse@redhat.com Link: http://lkml.kernel.org/r/20170719014603.19029-12-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Signed-off-by: Jérôme Glisse Acked-by: Christian König Acked-by: Peter Zijlstra (Intel) Acked-by: Doug Ledford Acked-by: Michael S. Tsirkin Cc: David Airlie Cc: Jason Wang Cc: Christian Benvenuti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/drm/drm_mm.h | 2 +- include/linux/fs.h | 4 +-- include/linux/interval_tree.h | 8 +++--- include/linux/interval_tree_generic.h | 46 +++++++++++++++++++++++++++-------- include/linux/mm.h | 17 +++++++------ include/linux/rmap.h | 4 ++- include/rdma/ib_umem_odp.h | 11 ++++++--- include/rdma/ib_verbs.h | 2 +- 8 files changed, 64 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 49b292e98fec..8d10fc97801c 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -172,7 +172,7 @@ struct drm_mm { * according to the (increasing) start address of the memory node. */ struct drm_mm_node head_node; /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */ - struct rb_root interval_tree; + struct rb_root_cached interval_tree; struct rb_root holes_size; struct rb_root holes_addr; diff --git a/include/linux/fs.h b/include/linux/fs.h index 509434aaf5a4..6111976848ff 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,7 +392,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ - struct rb_root i_mmap; /* tree of private and shared mappings */ + struct rb_root_cached i_mmap; /* tree of private and shared mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ @@ -487,7 +487,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) */ static inline int mapping_mapped(struct address_space *mapping) { - return !RB_EMPTY_ROOT(&mapping->i_mmap); + return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 724556aa3c95..202ee1283f4b 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -11,13 +11,15 @@ struct interval_tree_node { }; extern void -interval_tree_insert(struct interval_tree_node *node, struct rb_root *root); +interval_tree_insert(struct interval_tree_node *node, + struct rb_root_cached *root); extern void -interval_tree_remove(struct interval_tree_node *node, struct rb_root *root); +interval_tree_remove(struct interval_tree_node *node, + struct rb_root_cached *root); extern struct interval_tree_node * -interval_tree_iter_first(struct rb_root *root, +interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); extern struct interval_tree_node * diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index 58370e1862ad..f096423c8cbd 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -65,11 +65,13 @@ RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ \ /* Insert / remove interval nodes from the tree */ \ \ -ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ ITTYPE start = ITSTART(node), last = ITLAST(node); \ ITSTRUCT *parent; \ + bool leftmost = true; \ \ while (*link) { \ rb_parent = *link; \ @@ -78,18 +80,22 @@ ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ parent->ITSUBTREE = last; \ if (start < ITSTART(parent)) \ link = &parent->ITRB.rb_left; \ - else \ + else { \ link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ } \ \ node->ITSUBTREE = last; \ rb_link_node(&node->ITRB, rb_parent, link); \ - rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ } \ \ -ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ } \ \ /* \ @@ -140,15 +146,35 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ } \ \ ITSTATIC ITSTRUCT * \ -ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ { \ - ITSTRUCT *node; \ + ITSTRUCT *node, *leftmost; \ \ - if (!root->rb_node) \ + if (!root->rb_root.rb_node) \ return NULL; \ - node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ if (node->ITSUBTREE < start) \ return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ return ITPREFIX ## _subtree_search(node, start, last); \ } \ \ diff --git a/include/linux/mm.h b/include/linux/mm.h index 5195e272fc4a..f8c10d336e42 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2034,13 +2034,13 @@ extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_insert_after(struct vm_area_struct *node, struct vm_area_struct *prev, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_remove(struct vm_area_struct *node, - struct rb_root *root); -struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, + struct rb_root_cached *root); +struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, unsigned long start, unsigned long last); @@ -2050,11 +2050,12 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, vma; vma = vma_interval_tree_iter_next(vma, start, last)) void anon_vma_interval_tree_insert(struct anon_vma_chain *node, - struct rb_root *root); + struct rb_root_cached *root); void anon_vma_interval_tree_remove(struct anon_vma_chain *node, - struct rb_root *root); -struct anon_vma_chain *anon_vma_interval_tree_iter_first( - struct rb_root *root, unsigned long start, unsigned long last); + struct rb_root_cached *root); +struct anon_vma_chain * +anon_vma_interval_tree_iter_first(struct rb_root_cached *root, + unsigned long start, unsigned long last); struct anon_vma_chain *anon_vma_interval_tree_iter_next( struct anon_vma_chain *node, unsigned long start, unsigned long last); #ifdef CONFIG_DEBUG_VM_RB diff --git a/include/linux/rmap.h b/include/linux/rmap.h index f8ca2e74b819..733d3d8181e2 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -55,7 +55,9 @@ struct anon_vma { * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ - struct rb_root rb_root; /* Interval tree of private "related" vmas */ + + /* Interval tree of private "related" vmas */ + struct rb_root_cached rb_root; }; /* diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index fb67554aabd6..5eb7f5bc8248 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,22 +111,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); +void rbt_ib_umem_insert(struct umem_odp_node *node, + struct rb_root_cached *root); +void rbt_ib_umem_remove(struct umem_odp_node *node, + struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* * Call the callback on each ib_umem in the range. Returns the logical or of * the return values of the functions called. */ -int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 end, umem_call_back cb, void *cookie); /* * Find first region intersecting with address range. * Return NULL if not found */ -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length); static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6df68048517..bdb1279a415b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1457,7 +1457,7 @@ struct ib_ucontext { struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct rb_root umem_tree; + struct rb_root_cached umem_tree; /* * Protects .umem_rbroot and tree, as well as odp_mrs_count and * mmu notifiers registration. -- cgit From f2686bb48618718c7141f117e2d607594e959bfc Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 8 Sep 2017 16:15:12 -0700 Subject: lib/interval-tree: correct comment wrt generic flavor interval_tree.h _is_ the generic flavor. Link: http://lkml.kernel.org/r/20170719014603.19029-13-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Acked-by: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interval_tree_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index f096423c8cbd..1f97ce26cccc 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -33,7 +33,7 @@ * ITSTATIC: 'static' or empty * ITPREFIX: prefix to use for the inline tree definitions * - * Note - before using this, please consider if non-generic version + * Note - before using this, please consider if generic version * (interval_tree.h) would work for you... */ -- cgit From 60ef690018b262ddcd0d51edf10e40710deb9c9f Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 8 Sep 2017 16:15:41 -0700 Subject: bitmap: introduce BITMAP_FROM_U64() The macro is the compile-time analogue of bitmap_from_u64() with the same purpose: convert the 64-bit number to the properly ordered pair of 32-bit parts, suitable for filling the bitmap in 32-bit BE environment. Use it to make test_bitmap_parselist() correct for 32-bit BE ABIs. Tested on BE mips/qemu. [akpm@linux-foundation.org: tweak code comment] Link: http://lkml.kernel.org/r/20170810172916.24144-1-ynorov@caviumnetworks.com Signed-off-by: Yury Norov Cc: Noam Camus Cc: Rasmus Villemoes Cc: Matthew Wilcox Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5797ca6fdfe2..700cf5f67118 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -360,6 +360,38 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); } +/* + * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. + * + * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit + * integers in 32-bit environment, and 64-bit integers in 64-bit one. + * + * There are four combinations of endianness and length of the word in linux + * ABIs: LE64, BE64, LE32 and BE32. + * + * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in + * bitmaps and therefore don't require any special handling. + * + * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory + * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the + * other hand is represented as an array of 32-bit words and the position of + * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that + * word. For example, bit #42 is located at 10th position of 2nd word. + * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit + * values in memory as it usually does. But for BE we need to swap hi and lo + * words manually. + * + * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and + * lo parts of u64. For LE32 it does nothing, and for BE environment it swaps + * hi and lo words, as is expected by bitmap. + */ +#if __BITS_PER_LONG == 64 +#define BITMAP_FROM_U64(n) (n) +#else +#define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \ + ((unsigned long) ((u64)(n) >> 32)) +#endif + /* * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap -- cgit From 895a60728f83684e738cce34d7d4e64631505ae4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 8 Sep 2017 16:15:45 -0700 Subject: lib/rhashtable: fix comment on locks_mul default value As of commit 4cf0b354d92 ("rhashtable: avoid large lock-array allocations"), the default value for the locks multiplier was reduced from 128 to 32. Update the header file to reflect this. Link: http://lkml.kernel.org/r/20170815215401.30745-1-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Florian Westphal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rhashtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7d56a7ea2b2e..361c08e35dbc 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -127,7 +127,7 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) + * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) -- cgit From 42f46148217865a545e129612075f3d828a2c4e4 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 8 Sep 2017 16:16:24 -0700 Subject: autofs: fix AT_NO_AUTOMOUNT not being honored The fstatat(2) and statx() calls can pass the flag AT_NO_AUTOMOUNT which is meant to clear the LOOKUP_AUTOMOUNT flag and prevent triggering of an automount by the call. But this flag is unconditionally cleared for all stat family system calls except statx(). stat family system calls have always triggered mount requests for the negative dentry case in follow_automount() which is intended but prevents the fstatat(2) and statx() AT_NO_AUTOMOUNT case from being handled. In order to handle the AT_NO_AUTOMOUNT for both system calls the negative dentry case in follow_automount() needs to be changed to return ENOENT when the LOOKUP_AUTOMOUNT flag is clear (and the other required flags are clear). AFAICT this change doesn't have any noticable side effects and may, in some use cases (although I didn't see it in testing) prevent unnecessary callbacks to the automount daemon. It's also possible that a stat family call has been made with a path that is in the process of being mounted by some other process. But stat family calls should return the automount state of the path as it is "now" so it shouldn't wait for mount completion. This is the same semantic as the positive dentry case already handled. Link: http://lkml.kernel.org/r/150216641255.11652.4204561328197919771.stgit@pluto.themaw.net Fixes: deccf497d804a4c5fca ("Make stat/lstat/fstatat pass AT_NO_AUTOMOUNT to vfs_statx()") Signed-off-by: Ian Kent Cc: David Howells Cc: Colin Walters Cc: Ondrej Holy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6111976848ff..2d0e6748e46e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3043,8 +3043,7 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { -- cgit From 3dd8f7c3b78b9556582fd64bf5c9986723f9dca1 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 8 Sep 2017 16:16:30 -0700 Subject: autofs: make dev ioctl version and ismountpoint user accessible Some of the autofs miscellaneous device ioctls need to be accessable to user space applications without CAP_SYS_ADMIN to get information about autofs mounts. Link: http://lkml.kernel.org/r/150216642517.11652.2338933266137331637.stgit@pluto.themaw.net Signed-off-by: Ian Kent Cc: Colin Walters Cc: Ondrej Holy Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_dev-ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 744b3d060968..5558db8e6646 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -16,7 +16,7 @@ #define AUTOFS_DEVICE_NAME "autofs" #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 -#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 +#define AUTOFS_DEV_IOCTL_VERSION_MINOR 1 #define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) -- cgit From 1f28c5d055032e7e8ee5e48198dca7e125d0eec6 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Fri, 8 Sep 2017 16:16:34 -0700 Subject: autofs: remove unused AUTOFS_IOC_EXPIRE_DIRECT/INDIRECT These are not used by either kernel or userspace, although AUTOFS_IOC_EXPIRE_DIRECT once seems to have been used by userspace in around 2006-2008, which was technically just an alias of the existing ioctl AUTOFS_IOC_EXPIRE_MULTI. ioctls for autofs are already complicated enough that they could be removed unless these are staying here to be able to compile userspace code of certain period of time from a decade ago. Edit: raven@themaw.net Yes, this is indeed very old and anything that still uses must be updated becuase it will be using broken functionality. End edit: raven@themaw.net Link: http://lkml.kernel.org/r/150285067347.4670.11494624644273072003.stgit@pluto.themaw.net Signed-off-by: Tomohiro Kusumi Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_fs4.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 7c6da423d54e..9453e9a07c9d 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -155,8 +155,6 @@ enum { }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int) -#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI -#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int) #define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int) -- cgit From c1f3fa2a4fde2818623b42e3f749bd478be5dec7 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 8 Sep 2017 16:17:08 -0700 Subject: kmod: split off umh headers into its own file In the future usermode helper users do not need to carry in all the of kmod headers declarations. Since kmod.h still includes umh.h this change has no functional changes, each umh user can be cleaned up separately later and with time. Link: http://lkml.kernel.org/r/20170810180618.22457-4-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Cc: Kees Cook Cc: Dmitry Torokhov Cc: Jessica Yu Cc: Rusty Russell Cc: Michal Marek Cc: Petr Mladek Cc: Miroslav Benes Cc: Josh Poimboeuf Cc: Guenter Roeck Cc: "Eric W. Biederman" Cc: Matt Redfearn Cc: Dan Carpenter Cc: Colin Ian King Cc: Daniel Mentz Cc: David Binderman Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 60 +-------------------------------------------- include/linux/umh.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 59 deletions(-) create mode 100644 include/linux/umh.h (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 655082c88fd9..40c89ad4bea6 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -19,6 +19,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -44,63 +45,4 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; #define try_then_request_module(x, mod...) (x) #endif - -struct cred; -struct file; - -#define UMH_NO_WAIT 0 /* don't wait at all */ -#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ -#define UMH_WAIT_PROC 2 /* wait for the process to complete */ -#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ - -struct subprocess_info { - struct work_struct work; - struct completion *complete; - const char *path; - char **argv; - char **envp; - int wait; - int retval; - int (*init)(struct subprocess_info *info, struct cred *new); - void (*cleanup)(struct subprocess_info *info); - void *data; -} __randomize_layout; - -extern int -call_usermodehelper(const char *path, char **argv, char **envp, int wait); - -extern struct subprocess_info * -call_usermodehelper_setup(const char *path, char **argv, char **envp, - gfp_t gfp_mask, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); - -extern int -call_usermodehelper_exec(struct subprocess_info *info, int wait); - -extern struct ctl_table usermodehelper_table[]; - -enum umh_disable_depth { - UMH_ENABLED = 0, - UMH_FREEZING, - UMH_DISABLED, -}; - -extern int __usermodehelper_disable(enum umh_disable_depth depth); -extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); - -static inline int usermodehelper_disable(void) -{ - return __usermodehelper_disable(UMH_DISABLED); -} - -static inline void usermodehelper_enable(void) -{ - __usermodehelper_set_disable_depth(UMH_ENABLED); -} - -extern int usermodehelper_read_trylock(void); -extern long usermodehelper_read_lock_wait(long timeout); -extern void usermodehelper_read_unlock(void); - #endif /* __LINUX_KMOD_H__ */ diff --git a/include/linux/umh.h b/include/linux/umh.h new file mode 100644 index 000000000000..244aff638220 --- /dev/null +++ b/include/linux/umh.h @@ -0,0 +1,69 @@ +#ifndef __LINUX_UMH_H__ +#define __LINUX_UMH_H__ + +#include +#include +#include +#include +#include +#include + +struct cred; +struct file; + +#define UMH_NO_WAIT 0 /* don't wait at all */ +#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 2 /* wait for the process to complete */ +#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ + +struct subprocess_info { + struct work_struct work; + struct completion *complete; + const char *path; + char **argv; + char **envp; + int wait; + int retval; + int (*init)(struct subprocess_info *info, struct cred *new); + void (*cleanup)(struct subprocess_info *info); + void *data; +} __randomize_layout; + +extern int +call_usermodehelper(const char *path, char **argv, char **envp, int wait); + +extern struct subprocess_info * +call_usermodehelper_setup(const char *path, char **argv, char **envp, + gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); + +extern int +call_usermodehelper_exec(struct subprocess_info *info, int wait); + +extern struct ctl_table usermodehelper_table[]; + +enum umh_disable_depth { + UMH_ENABLED = 0, + UMH_FREEZING, + UMH_DISABLED, +}; + +extern int __usermodehelper_disable(enum umh_disable_depth depth); +extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); + +static inline int usermodehelper_disable(void) +{ + return __usermodehelper_disable(UMH_DISABLED); +} + +static inline void usermodehelper_enable(void) +{ + __usermodehelper_set_disable_depth(UMH_ENABLED); +} + +extern int usermodehelper_read_trylock(void); +extern long usermodehelper_read_lock_wait(long timeout); +extern void usermodehelper_read_unlock(void); + +#endif /* __LINUX_UMH_H__ */ -- cgit From f22ef333c32cc683922d7e3361a83ebc31b2ac6d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 8 Sep 2017 16:17:15 -0700 Subject: cpumask: make cpumask_next() out-of-line Every for_each_XXX_cpu() invocation calls cpumask_next() which is an inline function: static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); } However! find_next_bit() is regular out-of-line function which means "nr_cpu_ids" load and increment happen at the caller resulting in a lot of bloat x86_64 defconfig: add/remove: 3/0 grow/shrink: 8/373 up/down: 155/-5668 (-5513) x86_64 allyesconfig-ish: add/remove: 3/1 grow/shrink: 57/634 up/down: 3515/-28177 (-24662) !!! Some archs redefine find_next_bit() but it is OK: m68k inline but SMP is not supported arm out-of-line unicore32 out-of-line Function call will happen anyway, so move load and increment into callee. Link: http://lkml.kernel.org/r/20170824230010.GA1593@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 68c5a8290275..cd415b733c2a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -178,20 +178,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } -/** - * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) - * @srcp: the cpumask pointer - * - * Returns >= nr_cpu_ids if no further cpus set. - */ -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); -} +unsigned int cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask -- cgit From a2d818030135c293f878fbb772cf40e7a14c5acc Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 8 Sep 2017 16:17:19 -0700 Subject: drivers/pps: aesthetic tweaks to PPS-related content Collection of aesthetic adjustments to various PPS-related files, directories and Documentation, some quite minor just for the sake of consistency, including: * Updated example of pps device tree node (courtesy Rodolfo G.) * "PPS-API" -> "PPS API" * "pps_source_info_s" -> "pps_source_info" * "ktimer driver" -> "pps-ktimer driver" * "ppstest /dev/pps0" -> "ppstest /dev/pps1" to match example * Add missing PPS-related entries to MAINTAINERS file * Other trivialities Link: http://lkml.kernel.org/r/alpine.LFD.2.20.1708261048220.8106@localhost.localdomain Signed-off-by: Robert P. J. Day Acked-by: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pps-gpio.h | 2 +- include/linux/pps_kernel.h | 16 +++++++--------- include/uapi/linux/pps.h | 4 ++-- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h index 0035abe41b9a..56f35dd3d01d 100644 --- a/include/linux/pps-gpio.h +++ b/include/linux/pps-gpio.h @@ -29,4 +29,4 @@ struct pps_gpio_platform_data { const char *gpio_label; }; -#endif +#endif /* _PPS_GPIO_H */ diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 35ac903956c7..80a980cc8d95 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -22,7 +22,6 @@ #define LINUX_PPS_KERNEL_H #include - #include #include #include @@ -35,9 +34,9 @@ struct pps_device; /* The specific PPS source info */ struct pps_source_info { - char name[PPS_MAX_NAME_LEN]; /* simbolic name */ + char name[PPS_MAX_NAME_LEN]; /* symbolic name */ char path[PPS_MAX_NAME_LEN]; /* path of connected device */ - int mode; /* PPS's allowed mode */ + int mode; /* PPS allowed mode */ void (*echo)(struct pps_device *pps, int event, void *data); /* PPS echo function */ @@ -57,10 +56,10 @@ struct pps_event_time { struct pps_device { struct pps_source_info info; /* PSS source info */ - struct pps_kparams params; /* PPS's current params */ + struct pps_kparams params; /* PPS current params */ - __u32 assert_sequence; /* PPS' assert event seq # */ - __u32 clear_sequence; /* PPS' clear event seq # */ + __u32 assert_sequence; /* PPS assert event seq # */ + __u32 clear_sequence; /* PPS clear event seq # */ struct pps_ktime assert_tu; struct pps_ktime clear_tu; int current_mode; /* PPS mode at event time */ @@ -69,7 +68,7 @@ struct pps_device { wait_queue_head_t queue; /* PPS event queue */ unsigned int id; /* PPS source unique ID */ - void const *lookup_cookie; /* pps_lookup_dev only */ + void const *lookup_cookie; /* For pps_lookup_dev() only */ struct cdev cdev; struct device *dev; struct fasync_struct *async_queue; /* fasync method */ @@ -101,7 +100,7 @@ extern struct pps_device *pps_register_source( extern void pps_unregister_source(struct pps_device *pps); extern void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, void *data); -/* Look up a pps device by magic cookie */ +/* Look up a pps_device by magic cookie */ struct pps_device *pps_lookup_dev(void const *cookie); static inline void timespec_to_pps_ktime(struct pps_ktime *kt, @@ -132,4 +131,3 @@ static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta } #endif /* LINUX_PPS_KERNEL_H */ - diff --git a/include/uapi/linux/pps.h b/include/uapi/linux/pps.h index c1cb3825a8bc..c29d6b791c08 100644 --- a/include/uapi/linux/pps.h +++ b/include/uapi/linux/pps.h @@ -95,8 +95,8 @@ struct pps_kparams { #define PPS_CAPTURECLEAR 0x02 /* capture clear events */ #define PPS_CAPTUREBOTH 0x03 /* capture assert and clear events */ -#define PPS_OFFSETASSERT 0x10 /* apply compensation for assert ev. */ -#define PPS_OFFSETCLEAR 0x20 /* apply compensation for clear ev. */ +#define PPS_OFFSETASSERT 0x10 /* apply compensation for assert event */ +#define PPS_OFFSETCLEAR 0x20 /* apply compensation for clear event */ #define PPS_CANWAIT 0x100 /* can we wait for an event? */ #define PPS_CANPOLL 0x200 /* bit reserved for future use */ -- cgit From a2e0602c36ed9fe042714694dd5a889ecd8cb556 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 8 Sep 2017 16:17:38 -0700 Subject: ipc: convert ipc_namespace.count from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Link: http://lkml.kernel.org/r/1499417992-3238-2-git-send-email-elena.reshetova@intel.com Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: "Eric W. Biederman" Cc: Ingo Molnar Cc: Alexey Dobriyan Cc: Serge Hallyn Cc: Cc: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 65327ee0936b..e81445cc7c57 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -7,6 +7,7 @@ #include #include #include +#include struct user_namespace; @@ -19,7 +20,7 @@ struct ipc_ids { }; struct ipc_namespace { - atomic_t count; + refcount_t count; struct ipc_ids ids[3]; int sem_ctls[4]; @@ -118,7 +119,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->count); return ns; } -- cgit From 9405c03ee778cd3e353e55fff6e16dfdd9609c02 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 8 Sep 2017 16:17:45 -0700 Subject: ipc: convert kern_ipc_perm.refcount from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Link: http://lkml.kernel.org/r/1499417992-3238-4-git-send-email-elena.reshetova@intel.com Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: "Eric W. Biederman" Cc: Ingo Molnar Cc: Alexey Dobriyan Cc: Serge Hallyn Cc: Cc: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index fadd579d577d..ae68980e9d48 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -4,6 +4,7 @@ #include #include #include +#include #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ @@ -22,7 +23,7 @@ struct kern_ipc_perm { void *security; struct rcu_head rcu; - atomic_t refcount; + refcount_t refcount; } ____cacheline_aligned_in_smp __randomize_layout; #endif /* _LINUX_IPC_H */ -- cgit From 0cfb6aee70bddbef6ec796b255f588ce0e126766 Mon Sep 17 00:00:00 2001 From: Guillaume Knispel Date: Fri, 8 Sep 2017 16:17:55 -0700 Subject: ipc: optimize semget/shmget/msgget for lots of keys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ipc_findkey() used to scan all objects to look for the wanted key. This is slow when using a high number of keys. This change adds an rhashtable of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n). This change gives a 865% improvement of benchmark reaim.jobs_per_min on a 56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1] Other (more micro) benchmark results, by the author: On an i5 laptop, the following loop executed right after a reboot took, without and with this change: for (int i = 0, k=0x424242; i < KEYS; ++i) semget(k++, 1, IPC_CREAT | 0600); total total max single max single KEYS without with call without call with 1 3.5 4.9 µs 3.5 4.9 10 7.6 8.6 µs 3.7 4.7 32 16.2 15.9 µs 4.3 5.3 100 72.9 41.8 µs 3.7 4.7 1000 5,630.0 502.0 µs * * 10000 1,340,000.0 7,240.0 µs * * 31900 17,600,000.0 22,200.0 µs * * *: unreliable measure: high variance The duration for a lookup-only usage was obtained by the same loop once the keys are present: total total max single max single KEYS without with call without call with 1 2.1 2.5 µs 2.1 2.5 10 4.5 4.8 µs 2.2 2.3 32 13.0 10.8 µs 2.3 2.8 100 82.9 25.1 µs * 2.3 1000 5,780.0 217.0 µs * * 10000 1,470,000.0 2,520.0 µs * * 31900 17,400,000.0 7,810.0 µs * * Finally, executing each semget() in a new process gave, when still summing only the durations of these syscalls: creation: total total KEYS without with 1 3.7 5.0 µs 10 32.9 36.7 µs 32 125.0 109.0 µs 100 523.0 353.0 µs 1000 20,300.0 3,280.0 µs 10000 2,470,000.0 46,700.0 µs 31900 27,800,000.0 219,000.0 µs lookup-only: total total KEYS without with 1 2.5 2.7 µs 10 25.4 24.4 µs 32 106.0 72.6 µs 100 591.0 352.0 µs 1000 22,400.0 2,250.0 µs 10000 2,510,000.0 25,700.0 µs 31900 28,200,000.0 115,000.0 µs [1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debix Signed-off-by: Guillaume Knispel Reviewed-by: Marc Pardo Cc: Davidlohr Bueso Cc: Kees Cook Cc: Manfred Spraul Cc: Alexey Dobriyan Cc: "Eric W. Biederman" Cc: "Peter Zijlstra (Intel)" Cc: Ingo Molnar Cc: Sebastian Andrzej Siewior Cc: Serge Hallyn Cc: Andrey Vagin Cc: Guillaume Knispel Cc: Marc Pardo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 3 +++ include/linux/ipc_namespace.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index ae68980e9d48..92a2ccff80c5 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -22,6 +23,8 @@ struct kern_ipc_perm { unsigned long seq; void *security; + struct rhash_head khtnode; + struct rcu_head rcu; refcount_t refcount; } ____cacheline_aligned_in_smp __randomize_layout; diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e81445cc7c57..83f0bf7a587d 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -8,15 +8,18 @@ #include #include #include +#include struct user_namespace; struct ipc_ids { int in_use; unsigned short seq; + bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; + struct rhashtable key_ht; }; struct ipc_namespace { -- cgit From 5a67da2a71c64daeb456f6f3e87b5c7cecdc5ffa Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 8 Sep 2017 14:00:49 -0700 Subject: bpf: add support for sockmap detach programs The bpf map sockmap supports adding programs via attach commands. This patch adds the detach command to keep the API symmetric and allow users to remove previously added programs. Otherwise the user would have to delete the map and re-add it to get in this state. This also adds a series of additional tests to capture detach operation and also attaching/detaching invalid prog types. API note: socks will run (or not run) programs depending on the state of the map at the time the sock is added. We do not for example walk the map and remove programs from previously attached socks. Acked-by: Daniel Borkmann Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2cb1b5c094e..8390859e79e7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -385,16 +385,16 @@ static inline void __dev_map_flush(struct bpf_map *map) #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); -int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); +int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) { return NULL; } -static inline int sock_map_attach_prog(struct bpf_map *map, - struct bpf_prog *prog, - u32 type) +static inline int sock_map_prog(struct bpf_map *map, + struct bpf_prog *prog, + u32 type) { return -EOPNOTSUPP; } -- cgit From 9beb8bedb05c5f3a353dba62b8fa7cbbb9ec685e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 9 Sep 2017 01:40:35 +0200 Subject: bpf: make error reporting in bpf_warn_invalid_xdp_action more clear Differ between illegal XDP action code and just driver unsupported one to provide better feedback when we throw a one-time warning here. Reason is that with 814abfabef3c ("xdp: add bpf_redirect helper function") not all drivers support the new XDP return code yet and thus they will fall into their 'default' case when checking for return codes after program return, which then triggers a bpf_warn_invalid_xdp_action() stating that the return code is illegal, but from XDP perspective it's not. I decided not to place something like a XDP_ACT_MAX define into uapi i) given we don't have this either for all other program types, ii) future action codes could have further encoding there, which would render such define unsuitable and we wouldn't be able to rip it out again, and iii) we rarely add new action codes. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ba848b761cfb..43ab5c402f98 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -766,8 +766,8 @@ struct bpf_sock { /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other - * return codes are reserved for future use. Unknown return codes will result - * in packet drop. + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). */ enum xdp_action { XDP_ABORTED = 0, -- cgit From c3ca015fab6df124c933b91902f3f2a3473f9da5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 31 Aug 2017 21:47:43 -0400 Subject: dax: remove the pmem_dax_ops->flush abstraction Commit abebfbe2f731 ("dm: add ->flush() dax operation support") is buggy. A DM device may be composed of multiple underlying devices and all of them need to be flushed. That commit just routes the flush request to the first device and ignores the other devices. It could be fixed by adding more complex logic to the device mapper. But there is only one implementation of the method pmem_dax_ops->flush - that is pmem_dax_flush() - and it calls arch_wb_cache_pmem(). Consequently, we don't need the pmem_dax_ops->flush abstraction at all, we can call arch_wb_cache_pmem() directly from dax_flush() because dax_dev->ops->flush can't ever reach anything different from arch_wb_cache_pmem(). It should be also pointed out that for some uses of persistent memory it is needed to flush only a very small amount of data (such as 1 cacheline), and it would be overkill if we go through that device mapper machinery for a single flushed cache line. Fix this by removing the pmem_dax_ops->flush abstraction and call arch_wb_cache_pmem() directly from dax_flush(). Also, remove the device mapper code that forwards the flushes. Fixes: abebfbe2f731 ("dm: add ->flush() dax operation support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Reviewed-by: Dan Williams Signed-off-by: Mike Snitzer --- include/linux/dax.h | 5 +---- include/linux/device-mapper.h | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index df97b7af7e2c..0d8f35f6c53d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -19,8 +19,6 @@ struct dax_operations { /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); - /* flush: optional driver-specific cache management after writes */ - void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; extern struct attribute_group dax_attribute_group; @@ -84,8 +82,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); -void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, - size_t size); +void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3b5fdf308148..a5538433c927 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -134,8 +134,6 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); -typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, - size_t size); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -186,7 +184,6 @@ struct target_type { dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; dm_dax_copy_from_iter_fn dax_copy_from_iter; - dm_dax_flush_fn dax_flush; /* For internal device-mapper use. */ struct list_head list; -- cgit From f8e9ec16611baa8db77a7d46facd2ba7aa525955 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 7 Sep 2017 17:36:36 -0700 Subject: block: tolerate tracing of NULL bio __get_request() can call trace_block_getrq() with bio=NULL which causes block_get_rq::TP_fast_assign() to deref a NULL pointer and panic. Syzkaller fuzzer panics with linux-next (1d53d908b79d7870d89063062584eead4cf83448): kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 0 PID: 2983 Comm: syzkaller401111 Not tainted 4.13.0-rc7-next-20170901+ #13 task: ffff8801cf1da000 task.stack: ffff8801ce440000 RIP: 0010:perf_trace_block_get_rq+0x697/0x970 include/trace/events/block.h:384 RSP: 0018:ffff8801ce4473f0 EFLAGS: 00010246 RAX: ffff8801cf1da000 RBX: 1ffff10039c88e84 RCX: 1ffffd1ffff84d27 RDX: dffffc0000000001 RSI: 1ffff1003b643e7a RDI: ffffe8ffffc26938 RBP: ffff8801ce447530 R08: 1ffff1003b643e6c R09: ffffe8ffffc26964 R10: 0000000000000002 R11: fffff91ffff84d2d R12: ffffe8ffffc1f890 R13: ffffe8ffffc26930 R14: ffffffff85cad9e0 R15: 0000000000000000 FS: 0000000002641880(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000043e670 CR3: 00000001d1d7a000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: trace_block_getrq include/trace/events/block.h:423 [inline] __get_request block/blk-core.c:1283 [inline] get_request+0x1518/0x23b0 block/blk-core.c:1355 blk_old_get_request block/blk-core.c:1402 [inline] blk_get_request+0x1d8/0x3c0 block/blk-core.c:1427 sg_scsi_ioctl+0x117/0x750 block/scsi_ioctl.c:451 sg_ioctl+0x192d/0x2ed0 drivers/scsi/sg.c:1070 vfs_ioctl fs/ioctl.c:45 [inline] do_vfs_ioctl+0x1b1/0x1530 fs/ioctl.c:685 SYSC_ioctl fs/ioctl.c:700 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691 entry_SYSCALL_64_fastpath+0x1f/0xbe block_get_rq::TP_fast_assign() has multiple redundant ->dev assignments. Only one of them is NULL tolerant. Favor the NULL tolerant one. Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Greg Thelen Signed-off-by: Jens Axboe --- include/trace/events/block.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index f815aaaef755..1fd7ff1a46f7 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -397,7 +397,6 @@ DECLARE_EVENT_CLASS(block_get_rq, TP_fast_assign( __entry->dev = bio ? bio_dev(bio) : 0; - __entry->dev = bio_dev(bio); __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, @@ -414,7 +413,7 @@ DECLARE_EVENT_CLASS(block_get_rq, /** * block_getrq - get a free request entry in queue for block IO operations * @q: queue for operations - * @bio: pending block IO operation + * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * A request struct for queue @q has been allocated to handle the @@ -430,7 +429,7 @@ DEFINE_EVENT(block_get_rq, block_getrq, /** * block_sleeprq - waiting to get a free request entry in queue for block IO operation * @q: queue for operation - * @bio: pending block IO operation + * @bio: pending block IO operation (can be %NULL) * @rw: low bit indicates a read (%0) or a write (%1) * * In the case where a request struct cannot be provided for queue @q -- cgit From 044a9df1a7cbb89f48fcc0e9e39997989342966b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Sep 2017 12:09:28 -0400 Subject: nvme-pci: implement the HMB entry number and size limitations Adds support for the new Host Memory Buffer Minimum Descriptor Entry Size and Host Memory Maximum Descriptors Entries field that were added in TP 4002 HMB Enhancements. These allow the controller to advertise limits for the usual number of segments in the host memory buffer, as well as a minimum usable per-segment size. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- include/linux/nvme.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5144f9103723..87723c86f136 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -226,7 +226,9 @@ struct nvme_id_ctrl { __le16 mntmt; __le16 mxtmt; __le32 sanicap; - __u8 rsvd332[180]; + __le32 hmminds; + __le16 hmmaxd; + __u8 rsvd338[174]; __u8 sqes; __u8 cqes; __le16 maxcmd; -- cgit From 1359798f9d4082eb04575efdd19512fbd9c28464 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 6 Sep 2017 14:36:57 +0200 Subject: string.h: un-fortify memcpy_and_pad The way I'd implemented the new helper memcpy_and_pad with __FORTIFY_INLINE caused compiler warnings for certain kernel configurations. This helper is only used in a single place at this time, and thus doesn't benefit much from fortification. So simplify the code by dropping fortification support for now. Fixes: 01f33c336e2d "string.h: add memcpy_and_pad()" Signed-off-by: Martin Wilck Acked-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- include/linux/string.h | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index e1eeb0a8a969..54d21783e18d 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -434,20 +434,9 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) * @count: The number of bytes to copy * @pad: Character to use for padding if space is left in destination. */ -__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len, - const void *src, size_t count, int pad) +static inline void memcpy_and_pad(void *dest, size_t dest_len, + const void *src, size_t count, int pad) { - size_t dest_size = __builtin_object_size(dest, 0); - size_t src_size = __builtin_object_size(src, 0); - - if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) { - if (dest_size < dest_len && dest_size < count) - __write_overflow(); - else if (src_size < dest_len && src_size < count) - __read_overflow3(); - } - if (dest_size < dest_len) - fortify_panic(__func__); if (dest_len > count) { memcpy(dest, src, count); memset(dest + count, pad, dest_len - count); -- cgit From 609320c8a22715b74b39796930c3542719f8ab62 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 7 Sep 2017 18:36:15 -0700 Subject: perf/bpf: fix a clang compilation issue clang does not support variable length array for structure member. It has the following error during compilation: kernel/trace/trace_syscalls.c:568:17: error: fields must have a constant size: 'variable length array in structure' extension will never be supported unsigned long args[sys_data->nb_args]; ^ The fix is to use a fixed array length instead. Reported-by: Nick Desaulniers Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88951b795ee3..95606a2d556f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -200,6 +200,8 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE_MAXARGS 6 + #define SYSCALL_DEFINEx(x, sname, ...) \ SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) -- cgit From 96c5508e3012ed0984ab93821d64ac1ff3279c09 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sun, 10 Sep 2017 09:47:02 +0200 Subject: xdp: implement xdp_redirect_map for generic XDP Using bpf_redirect_map is allowed for generic XDP programs, but the appropriate map lookup was never performed in xdp_do_generic_redirect(). Instead the map-index is directly used as the ifindex. For the xdp_redirect_map sample in SKB-mode '-S', this resulted in trying sending on ifindex 0 which isn't valid, resulting in getting SKB packets dropped. Thus, the reported performance numbers are wrong in commit 24251c264798 ("samples/bpf: add option for native and skb mode for redirect apps") for the 'xdp_redirect_map -S' case. Before commit 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") it could crash the kernel. Like this commit also check that the map_owner owner is correct before dereferencing the map pointer. But make sure that this API misusage can be caught by a tracepoint. Thus, allowing userspace via tracepoints to detect misbehaving bpf_progs. Fixes: 6103aa96ec07 ("net: implement XDP_REDIRECT for xdp generic") Fixes: 24251c264798 ("samples/bpf: add option for native and skb mode for redirect apps") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/xdp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 862575ac8da9..4e16c43fba10 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -138,11 +138,11 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ - 0, map, idx); + 0, map, idx) #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ - err, map, idx); + err, map, idx) #endif /* _TRACE_XDP_H */ -- cgit From d7fb60b9cafb982cb2e46a267646a8dfd4f2e5da Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 11 Sep 2017 16:33:30 -0700 Subject: net_sched: get rid of tcfa_rcu gen estimator has been rewritten in commit 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators"), the caller is no longer needed to wait for a grace period. So this patch gets rid of it. This also completely closes a race condition between action free path and filter chain add/remove path for the following patch. Because otherwise the nested RCU callback can't be caught by rcu_barrier(). Please see also the comments in code. Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: Eric Dumazet Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8f3d5d8b5ae0..b944e0eb93be 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -34,7 +34,6 @@ struct tc_action { struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; - struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie *act_cookie; @@ -50,7 +49,6 @@ struct tc_action { #define tcf_qstats common.tcfa_qstats #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock -#define tcf_rcu common.tcfa_rcu /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. -- cgit From fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 14 Sep 2017 02:00:54 +0300 Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled() This code causes a static checker warning because Smatch doesn't trust anything that comes from skb->data. I've reviewed this code and I do think skb->data can be controlled by the user here. The sctp_event_subscribe struct has 13 __u8 fields and we want to see if ours is non-zero. sn_type can be any value in the 0-USHRT_MAX range. We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read either before the start of the struct or after the end. This is a very old bug and it's surprising that it would go undetected for so long but my theory is that it just doesn't have a big impact so it would be hard to notice. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 1060494ac230..b8c86ec1a8f5 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); static inline int sctp_ulpevent_type_enabled(__u16 sn_type, struct sctp_event_subscribe *mask) { + int offset = sn_type - SCTP_SN_TYPE_BASE; char *amask = (char *) mask; - return amask[sn_type - SCTP_SN_TYPE_BASE]; + + if (offset >= sizeof(struct sctp_event_subscribe)) + return 0; + return amask[offset]; } /* Given an event subscription, is this event enabled? */ -- cgit From 0ee931c4e31a5efb134c76440405e9219f896e33 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 13 Sep 2017 16:28:29 -0700 Subject: mm: treewide: remove GFP_TEMPORARY allocation flag GFP_TEMPORARY was introduced by commit e12ba74d8ff3 ("Group short-lived and reclaimable kernel allocations") along with __GFP_RECLAIMABLE. It's primary motivation was to allow users to tell that an allocation is short lived and so the allocator can try to place such allocations close together and prevent long term fragmentation. As much as this sounds like a reasonable semantic it becomes much less clear when to use the highlevel GFP_TEMPORARY allocation flag. How long is temporary? Can the context holding that memory sleep? Can it take locks? It seems there is no good answer for those questions. The current implementation of GFP_TEMPORARY is basically GFP_KERNEL | __GFP_RECLAIMABLE which in itself is tricky because basically none of the existing caller provide a way to reclaim the allocated memory. So this is rather misleading and hard to evaluate for any benefits. I have checked some random users and none of them has added the flag with a specific justification. I suspect most of them just copied from other existing users and others just thought it might be a good idea to use without any measuring. This suggests that GFP_TEMPORARY just motivates for cargo cult usage without any reasoning. I believe that our gfp flags are quite complex already and especially those with highlevel semantic should be clearly defined to prevent from confusion and abuse. Therefore I propose dropping GFP_TEMPORARY and replace all existing users to simply use GFP_KERNEL. Please note that SLAB users with shrinkers will still get __GFP_RECLAIMABLE heuristic and so they will be placed properly for memory fragmentation prevention. I can see reasons we might want some gfp flag to reflect shorterm allocations but I propose starting from a clear semantic definition and only then add users with proper justification. This was been brought up before LSF this year by Matthew [1] and it turned out that GFP_TEMPORARY really doesn't have a clear semantic. It seems to be a heuristic without any measured advantage for most (if not all) its current users. The follow up discussion has revealed that opinions on what might be temporary allocation differ a lot between developers. So rather than trying to tweak existing users into a semantic which they haven't expected I propose to simply remove the flag and start from scratch if we really need a semantic for short term allocations. [1] http://lkml.kernel.org/r/20170118054945.GD18349@bombadil.infradead.org [akpm@linux-foundation.org: fix typo] [akpm@linux-foundation.org: coding-style fixes] [sfr@canb.auug.org.au: drm/i915: fix up] Link: http://lkml.kernel.org/r/20170816144703.378d4f4d@canb.auug.org.au Link: http://lkml.kernel.org/r/20170728091904.14627-1-mhocko@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Stephen Rothwell Acked-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Matthew Wilcox Cc: Neil Brown Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 -- include/trace/events/mmflags.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bcfb9f7c46f5..f780718b7391 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -288,8 +288,6 @@ struct vm_area_struct; #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) -#define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ - __GFP_RECLAIMABLE) #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_DMA __GFP_DMA #define GFP_DMA32 __GFP_DMA32 diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 4c2e4737d7bc..fec6291a6703 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -18,7 +18,6 @@ {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ {(unsigned long)GFP_USER, "GFP_USER"}, \ - {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ {(unsigned long)GFP_KERNEL_ACCOUNT, "GFP_KERNEL_ACCOUNT"}, \ {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ -- cgit From 488e32f1981e506976d666b3772d6ccc8b726fee Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 14 Sep 2017 11:50:07 +0200 Subject: KVM: trace events: update list of exit reasons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding entries for exit reasons 23 - 27: KVM_EXIT_EPR KVM_EXIT_SYSTEM_EVENT KVM_EXIT_S390_STSI KVM_EXIT_IOAPIC_EOI KVM_EXIT_HYPERV Signed-off-by: Ladi Prosek Reviewed-by: Cornelia Huck Signed-off-by: Radim Krčmář --- include/trace/events/kvm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 8ade3eb6c640..dcffedfac431 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -14,7 +14,9 @@ ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \ ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ - ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH) + ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\ + ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \ + ERSN(HYPERV) TRACE_EVENT(kvm_userspace_exit, TP_PROTO(__u32 reason, int errno), -- cgit From 2554db916586b228ce93e6f74a12fd7fe430a004 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 25 Aug 2017 09:13:54 -0700 Subject: sched/wait: Break up long wake list walk We encountered workloads that have very long wake up list on large systems. A waker takes a long time to traverse the entire wake list and execute all the wake functions. We saw page wait list that are up to 3700+ entries long in tests of large 4 and 8 socket systems. It took 0.8 sec to traverse such list during wake up. Any other CPU that contends for the list spin lock will spin for a long time. It is a result of the numa balancing migration of hot pages that are shared by many threads. Multiple CPUs waking are queued up behind the lock, and the last one queued has to wait until all CPUs did all the wakeups. The page wait list is traversed with interrupt disabled, which caused various problems. This was the original cause that triggered the NMI watch dog timer in: https://patchwork.kernel.org/patch/9800303/ . Only extending the NMI watch dog timer there helped. This patch bookmarks the waker's scan position in wake list and break the wake up walk, to allow access to the list before the waker resume its walk down the rest of the wait list. It lowers the interrupt and rescheduling latency. This patch also provides a performance boost when combined with the next patch to break up page wakeup list walk. We saw 22% improvement in the will-it-scale file pread2 test on a Xeon Phi system running 256 threads. [ v2: Merged in Linus' changes to remove the bookmark_wake_function, and simply access to flags. ] Reported-by: Kan Liang Tested-by: Kan Liang Signed-off-by: Tim Chen Signed-off-by: Linus Torvalds --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index dc19880c02f5..78401ef02d29 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -18,6 +18,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int /* wait_queue_entry::flags */ #define WQ_FLAG_EXCLUSIVE 0x01 #define WQ_FLAG_WOKEN 0x02 +#define WQ_FLAG_BOOKMARK 0x04 /* * A single wait-queue entry structure: -- cgit From 11a19c7b099f96d00a8dec52bfbb8475e89b6745 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 25 Aug 2017 09:13:55 -0700 Subject: sched/wait: Introduce wakeup boomark in wake_up_page_bit Now that we have added breaks in the wait queue scan and allow bookmark on scan position, we put this logic in the wake_up_page_bit function. We can have very long page wait list in large system where multiple pages share the same wait list. We break the wake up walk here to allow other cpus a chance to access the list, and not to disable the interrupts when traversing the list for too long. This reduces the interrupt and rescheduling latency, and excessive page wait queue lock hold time. [ v2: Remove bookmark_wake_function ] Signed-off-by: Tim Chen Signed-off-by: Linus Torvalds --- include/linux/wait.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 78401ef02d29..87c4641023fb 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -185,6 +185,8 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); +void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, + unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); -- cgit From 711aab1dbb324d321e3d84368a435a78908c7bce Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 12 Sep 2017 22:45:33 -0400 Subject: vfs: constify path argument to kernel_read_file_from_path This patch constifies the path argument to kernel_read_file_from_path(). Signed-off-by: Mimi Zohar Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bc475dfeb4ce..339e73742e73 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2815,7 +2815,7 @@ static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) extern int kernel_read_file(struct file *, void **, loff_t *, loff_t, enum kernel_read_file_id); -extern int kernel_read_file_from_path(char *, void **, loff_t *, loff_t, +extern int kernel_read_file_from_path(const char *, void **, loff_t *, loff_t, enum kernel_read_file_id); extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); -- cgit From 8cd641e3c7cbf86c7cbd2a17a160dd137d86c860 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 13 Sep 2017 13:08:18 -0700 Subject: sched/wait: Add swq_has_sleeper() Which is the equivalent of what we have in regular waitqueues. I'm not crazy about the name, but this also helps us get both apis closer -- which iirc comes originally from the -net folks. We also duplicate the comments for the lockless swait_active(), from wait.h. Future users will make use of this interface. Signed-off-by: Davidlohr Bueso Signed-off-by: Paolo Bonzini --- include/linux/swait.h | 58 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swait.h b/include/linux/swait.h index 4a4e180d0a35..73e97a08d3d0 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -79,9 +79,63 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name DECLARE_SWAIT_QUEUE_HEAD(name) #endif -static inline int swait_active(struct swait_queue_head *q) +/** + * swait_active -- locklessly test for waiters on the queue + * @wq: the waitqueue to test for waiters + * + * returns true if the wait list is not empty + * + * NOTE: this function is lockless and requires care, incorrect usage _will_ + * lead to sporadic and non-obvious failure. + * + * NOTE2: this function has the same above implications as regular waitqueues. + * + * Use either while holding swait_queue_head::lock or when used for wakeups + * with an extra smp_mb() like: + * + * CPU0 - waker CPU1 - waiter + * + * for (;;) { + * @cond = true; prepare_to_swait(&wq_head, &wait, state); + * smp_mb(); // smp_mb() from set_current_state() + * if (swait_active(wq_head)) if (@cond) + * wake_up(wq_head); break; + * schedule(); + * } + * finish_swait(&wq_head, &wait); + * + * Because without the explicit smp_mb() it's possible for the + * swait_active() load to get hoisted over the @cond store such that we'll + * observe an empty wait list while the waiter might not observe @cond. + * This, in turn, can trigger missing wakeups. + * + * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), + * which (when the lock is uncontended) are of roughly equal cost. + */ +static inline int swait_active(struct swait_queue_head *wq) +{ + return !list_empty(&wq->task_list); +} + +/** + * swq_has_sleeper - check if there are any waiting processes + * @wq: the waitqueue to test for waiters + * + * Returns true if @wq has waiting processes + * + * Please refer to the comment for swait_active. + */ +static inline bool swq_has_sleeper(struct swait_queue_head *wq) { - return !list_empty(&q->task_list); + /* + * We need to be sure we are in sync with the list_add() + * modifications to the wait queue (task_list). + * + * This memory barrier should be paired with one on the + * waiting side. + */ + smp_mb(); + return swait_active(wq); } extern void swake_up(struct swait_queue_head *q); -- cgit From d25adbeb0cdb860fb39e09cdd025e9cfc954c5ab Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Sep 2017 11:02:21 +0800 Subject: sctp: fix an use-after-free issue in sctp_sock_dump Commit 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") tried to fix an use-after-free issue by checking !sctp_sk(sk)->ep with holding sock and sock lock. But Paolo noticed that endpoint could be destroyed in sctp_rcv without sock lock protection. It means the use-after-free issue still could be triggered when sctp_rcv put and destroy ep after sctp_sock_dump checks !ep, although it's pretty hard to reproduce. I could reproduce it by mdelay in sctp_rcv while msleep in sctp_close and sctp_sock_dump long time. This patch is to add another param cb_done to sctp_for_each_transport and dump ep->assocs with holding tsp after jumping out of transport's traversal in it to avoid this issue. It can also improve sctp diag dump to make it run faster, as no need to save sk into cb->args[5] and keep calling sctp_for_each_transport any more. This patch is also to use int * instead of int for the pos argument in sctp_for_each_transport, which could make postion increment only in sctp_for_each_transport and no need to keep changing cb->args[2] in sctp_sock_filter and sctp_sock_dump any more. Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") Reported-by: Paolo Abeni Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 06b4f515e157..d7d8cba01469 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -127,7 +127,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p); + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); -- cgit From bf29ed1567b67854dc13504f685c45a2ea9b2081 Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Thu, 7 Sep 2017 08:30:44 -0700 Subject: syscalls: Use CHECK_DATA_CORRUPTION for addr_limit_user_check Use CHECK_DATA_CORRUPTION instead of BUG_ON to provide more flexibility on address limit failures. By default, send a SIGKILL signal to kill the current process preventing exploitation of a bad address limit. Make the TIF_FSCHECK flag optional so ARM can use this function. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: Pratyush Anand Cc: Dave Martin Cc: Will Drewry Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: Andy Lutomirski Cc: David Howells Cc: Dave Hansen Cc: Al Viro Cc: linux-api@vger.kernel.org Cc: Yonghong Song Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1504798247-48833-2-git-send-email-keescook@chromium.org --- include/linux/syscalls.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 95606a2d556f..a78186d826d7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -221,21 +221,25 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) } \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#ifdef TIF_FSCHECK /* * Called before coming back to user-mode. Returning to user-mode with an * address limit different than USER_DS can allow to overwrite kernel memory. */ static inline void addr_limit_user_check(void) { - +#ifdef TIF_FSCHECK if (!test_thread_flag(TIF_FSCHECK)) return; +#endif - BUG_ON(!segment_eq(get_fs(), USER_DS)); + if (CHECK_DATA_CORRUPTION(!segment_eq(get_fs(), USER_DS), + "Invalid address limit on user-mode return")) + force_sig(SIGKILL, current); + +#ifdef TIF_FSCHECK clear_thread_flag(TIF_FSCHECK); -} #endif +} asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr); -- cgit From 74378c5c8cdaf0ce9f65e67cbd0613286f2c3bad Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Sep 2017 20:16:27 +0200 Subject: driver core: Fix link to device power management documentation Correct location as of commit 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST). Fixes: 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST) Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index c6f27207dbe8..1d2607923a24 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -838,7 +838,7 @@ struct dev_links_info { * @driver_data: Private pointer for driver specific info. * @links: Links to suppliers and consumers of this device. * @power: For device power management. - * See Documentation/power/admin-guide/devices.rst for details. + * See Documentation/driver-api/pm/devices.rst for details. * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. -- cgit From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 18 Sep 2017 11:05:16 -0700 Subject: tcp: remove two unused functions remove tcp_may_send_now and tcp_snd_test that are no longer used Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..3bc910a9bfc6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); -bool tcp_may_send_now(struct sock *sk); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); -- cgit From 0555ac4333d7da7cff83d5b06bd3679a3e1ef584 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Mon, 18 Sep 2017 16:35:32 -0600 Subject: xen, arm64: drop dummy lookup_address() This is unused, and conflicts with the definition that we'll add for XPFO. Signed-off-by: Tycho Andersen Reviewed-by: Julien Grall CC: Boris Ostrovsky CC: Juergen Gross CC: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- include/xen/arm/page.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h index 415dbc6e43fd..6adc2a955340 100644 --- a/include/xen/arm/page.h +++ b/include/xen/arm/page.h @@ -84,16 +84,6 @@ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) BUG(); } -/* TODO: this shouldn't be here but it is because the frontend drivers - * are using it (its rolled in headers) even though we won't hit the code path. - * So for right now just punt with this. - */ -static inline pte_t *lookup_address(unsigned long address, unsigned int *level) -{ - BUG(); - return NULL; -} - extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -- cgit From aa767cfb75341a6b93742f4d7d1589ac2532c29e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Sep 2017 22:07:50 +0200 Subject: of: provide inline helper for of_find_device_by_node The ipmmu-vmsa driver fails in compile-testing on non-OF platforms: drivers/iommu/ipmmu-vmsa.o: In function `ipmmu_of_xlate': ipmmu-vmsa.c:(.text+0x740): undefined reference to `of_find_device_by_node' It would be reasonable to assume that this interface works but returns failure on non-OF builds, like it does on machines that have been booted in another way, so this adds another inline function helper. Fixes: 7b2d59611fef ("iommu/ipmmu-vmsa: Replace local utlb code with fwspec ids") Signed-off-by: Arnd Bergmann Signed-off-by: Rob Herring --- include/linux/of_platform.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index e0d1946270f3..fb908e598348 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -57,7 +57,14 @@ extern const struct of_device_id of_default_bus_match_table[]; extern struct platform_device *of_device_alloc(struct device_node *np, const char *bus_id, struct device *parent); +#ifdef CONFIG_OF extern struct platform_device *of_find_device_by_node(struct device_node *np); +#else +static inline struct platform_device *of_find_device_by_node(struct device_node *np) +{ + return NULL; +} +#endif /* Platform devices and busses creation */ extern struct platform_device *of_platform_device_create(struct device_node *np, -- cgit From 9e987b70ada27554c5d176421de1d167218c49b5 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 15 Sep 2017 17:35:27 -0700 Subject: ACPI / bus: Make ACPI_HANDLE() work for non-GPL code again Due to commit db3e50f3234b (device property: Get rid of struct fwnode_handle type field), ACPI_HANDLE() inadvertently became a GPL-only call. The call path that led to that was: ACPI_HANDLE() ACPI_COMPANION() to_acpi_device_node() is_acpi_device_node() acpi_device_fwnode_ops DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops); ...and the new DECLARE_ACPI_FWNODE_OPS() includes EXPORT_SYMBOL_GPL, whereas previously it was a static struct. In order to avoid changing any of that, let's instead provide ever so slightly better encapsulation of those struct fwnode_operations instances. Those do not really need to be directly used in inline function calls in header files. Simply moving two small functions (is_acpi_device_node and is_acpi_data_node) out of acpi_bus.h, and into a .c file, does that. That leaves the internals of struct fwnode_operations as GPL-only (which I think was the intent all along), but un-breaks any driver code out there that relies on the ACPI subsystem's being (historically) an EXPORT_SYMBOL-usable system. By that, I mean, ACPI_HANDLE() and other basic ACPI calls were non-GPL-protected. Also, while I'm there, remove a tiny bit of redundancy that was missed in the earlier commit, by having is_acpi_node() use the other two routines, instead of checking fwnode directly. Fixes: db3e50f3234b (device property: Get rid of struct fwnode_handle type field) Signed-off-by: John Hubbard Acked-by: Sakari Ailus Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index dedf9d789166..fa1505292f6c 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -399,17 +399,12 @@ extern const struct fwnode_operations acpi_device_fwnode_ops; extern const struct fwnode_operations acpi_data_fwnode_ops; extern const struct fwnode_operations acpi_static_fwnode_ops; +bool is_acpi_device_node(const struct fwnode_handle *fwnode); +bool is_acpi_data_node(const struct fwnode_handle *fwnode); + static inline bool is_acpi_node(const struct fwnode_handle *fwnode) { - return !IS_ERR_OR_NULL(fwnode) && - (fwnode->ops == &acpi_device_fwnode_ops - || fwnode->ops == &acpi_data_fwnode_ops); -} - -static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) -{ - return !IS_ERR_OR_NULL(fwnode) && - fwnode->ops == &acpi_device_fwnode_ops; + return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode)); } #define to_acpi_device_node(__fwnode) \ @@ -422,11 +417,6 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) NULL; \ }) -static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode) -{ - return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops; -} - #define to_acpi_data_node(__fwnode) \ ({ \ typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode; \ -- cgit From ec9dd352d591f0c90402ec67a317c1ed4fb2e638 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 18 Sep 2017 16:38:36 -0700 Subject: bpf: one perf event close won't free bpf program attached by another perf event This patch fixes a bug exhibited by the following scenario: 1. fd1 = perf_event_open with attr.config = ID1 2. attach bpf program prog1 to fd1 3. fd2 = perf_event_open with attr.config = ID1 4. user program closes fd2 and prog1 is detached from the tracepoint. 5. user program with fd1 does not work properly as tracepoint no output any more. The issue happens at step 4. Multiple perf_event_open can be called successfully, but only one bpf prog pointer in the tp_event. In the current logic, any fd release for the same tp_event will free the tp_event->prog. The fix is to free tp_event->prog only when the closing fd corresponds to the one which registered the program. Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/trace_events.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 7f11050746ae..2e0f22298fe9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -272,6 +272,7 @@ struct trace_event_call { int perf_refcount; struct hlist_head __percpu *perf_events; struct bpf_prog *prog; + struct perf_event *bpf_prog_owner; int (*perf_perm)(struct trace_event_call *, struct perf_event *); -- cgit From 0551968add53777fddd18f4ffb4e3bbc1f646d79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Sep 2017 11:54:44 +0200 Subject: Revert "genirq: Restrict effective affinity to interrupts actually using it" This reverts commit 74def747bcd09692bdbf8c6a15350795b0f11ca8. The change to the helper function is only correct for the /proc/irq/ readout usage, but breaks the existing x86 usage of that function. Reported-by: Yanko Kaneti Signed-off-by: Thomas Gleixner Cc: Marc Zyngier --- include/linux/irq.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index b99a784635ff..d4728bf6a537 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -783,10 +783,7 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) static inline struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) { - if (!cpumask_empty(d->common->effective_affinity)) - return d->common->effective_affinity; - - return d->common->affinity; + return d->common->effective_affinity; } static inline void irq_data_update_effective_affinity(struct irq_data *d, const struct cpumask *m) -- cgit From 19cab8872692960535aa6d12e3a295ac51d1a648 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Sep 2017 15:52:13 -0700 Subject: net: ethtool: Add back transceiver type Commit 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") deprecated the ethtool_cmd::transceiver field, which was fine in premise, except that the PHY library was actually using it to report the type of transceiver: internal or external. Use the first word of the reserved field to put this __u8 transceiver field back in. It is made read-only, and we don't expect the ETHTOOL_xLINKSETTINGS API to be doing anything with this anyway, so this is mostly for the legacy path where we do: ethtool_get_settings() -> dev->ethtool_ops->get_link_ksettings() -> convert_link_ksettings_to_legacy_settings() to have no information loss compared to the legacy get_settings API. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9c041dae8e2c..5bd1b1de4ea0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1753,6 +1753,8 @@ enum ethtool_reset_flags { * %ethtool_link_mode_bit_indices for the link modes, and other * link features that the link partner advertised through * autonegotiation; 0 if unknown or not applicable. Read-only. + * @transceiver: Used to distinguish different possible PHY types, + * reported consistently by PHYLIB. Read-only. * * If autonegotiation is disabled, the speed and @duplex represent the * fixed link mode and are writable if the driver supports multiple @@ -1804,7 +1806,9 @@ struct ethtool_link_settings { __u8 eth_tp_mdix; __u8 eth_tp_mdix_ctrl; __s8 link_mode_masks_nwords; - __u32 reserved[8]; + __u8 transceiver; + __u8 reserved1[3]; + __u32 reserved[7]; __u32 link_mode_masks[0]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; -- cgit From e8b95728f724797f958912fd9b765a695595d3a6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 1 Sep 2017 17:13:43 -0700 Subject: Input: uinput - avoid FF flush when destroying device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, when input device supporting force feedback effects is being destroyed, we try to "flush" currently playing effects, so that the physical device does not continue vibrating (or executing other effects). Unfortunately this does not work well for uinput as flushing of the effects deadlocks with the destroy action: - if device is being destroyed because the file descriptor is being closed, then there is noone to even service FF requests; - if device is being destroyed because userspace sent UI_DEV_DESTROY, while theoretically it could be possible to service FF requests, userspace is unlikely to do so (they'd need to make sure FF handling happens on a separate thread) even if kernel solves the issue with FF ioctls deadlocking with UI_DEV_DESTROY ioctl on udev->mutex. To avoid lockups like the one below, let's install a custom input device flush handler, and avoid trying to flush force feedback effects when we destroying the device, and instead rely on uinput to shut off the device properly. NMI watchdog: Watchdog detected hard LOCKUP on cpu 3 ... <> [] _raw_spin_lock_irqsave+0x37/0x40 [] complete+0x1d/0x50 [] uinput_request_done+0x3c/0x40 [uinput] [] uinput_request_submit.part.7+0x47/0xb0 [uinput] [] uinput_dev_erase_effect+0x5b/0x76 [uinput] [] erase_effect+0xad/0xf0 [] flush_effects+0x4d/0x90 [] input_flush_device+0x40/0x60 [] evdev_cleanup+0xac/0xc0 [] evdev_disconnect+0x2b/0x60 [] __input_unregister_device+0xac/0x150 [] input_unregister_device+0x47/0x70 [] uinput_destroy_device+0xb5/0xc0 [uinput] [] uinput_ioctl_handler.isra.9+0x65e/0x740 [uinput] [] ? do_futex+0x12b/0xad0 [] uinput_ioctl+0x18/0x20 [uinput] [] do_vfs_ioctl+0x298/0x480 [] ? security_file_ioctl+0x43/0x60 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 Reported-by: Rodrigo Rivas Costa Reported-by: Clément VUCHENER Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193741 Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index a65e3b24fb18..fb5e23c7ed98 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file); int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file); +int input_ff_flush(struct input_dev *dev, struct file *file); int input_ff_create_memless(struct input_dev *dev, void *data, int (*play_effect)(struct input_dev *, void *, struct ff_effect *)); -- cgit From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2017 09:15:46 -0700 Subject: net: prevent dst uses after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In linux-4.13, Wei worked hard to convert dst to a traditional refcounted model, removing GC. We now want to make sure a dst refcount can not transition from 0 back to 1. The problem here is that input path attached a not refcounted dst to an skb. Then later, because packet is forwarded and hits skb_dst_force() before exiting RCU section, we might try to take a refcount on one dst that is about to be freed, if another cpu saw 1 -> 0 transition in dst_release() and queued the dst for freeing after one RCU grace period. Lets unify skb_dst_force() and skb_dst_force_safe(), since we should always perform the complete check against dst refcount, and not assume it is not zero. Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005 [ 989.919496] skb_dst_force+0x32/0x34 [ 989.919498] __dev_queue_xmit+0x1ad/0x482 [ 989.919501] ? eth_header+0x28/0xc6 [ 989.919502] dev_queue_xmit+0xb/0xd [ 989.919504] neigh_connected_output+0x9b/0xb4 [ 989.919507] ip_finish_output2+0x234/0x294 [ 989.919509] ? ipt_do_table+0x369/0x388 [ 989.919510] ip_finish_output+0x12c/0x13f [ 989.919512] ip_output+0x53/0x87 [ 989.919513] ip_forward_finish+0x53/0x5a [ 989.919515] ip_forward+0x2cb/0x3e6 [ 989.919516] ? pskb_trim_rcsum.part.9+0x4b/0x4b [ 989.919518] ip_rcv_finish+0x2e2/0x321 [ 989.919519] ip_rcv+0x26f/0x2eb [ 989.919522] ? vlan_do_receive+0x4f/0x289 [ 989.919523] __netif_receive_skb_core+0x467/0x50b [ 989.919526] ? tcp_gro_receive+0x239/0x239 [ 989.919529] ? inet_gro_receive+0x226/0x238 [ 989.919530] __netif_receive_skb+0x4d/0x5f [ 989.919532] netif_receive_skb_internal+0x5c/0xaf [ 989.919533] napi_gro_receive+0x45/0x81 [ 989.919536] ixgbe_poll+0xc8a/0xf09 [ 989.919539] ? kmem_cache_free_bulk+0x1b6/0x1f7 [ 989.919540] net_rx_action+0xf4/0x266 [ 989.919543] __do_softirq+0xa8/0x19d [ 989.919545] irq_exit+0x5d/0x6b [ 989.919546] do_IRQ+0x9c/0xb5 [ 989.919548] common_interrupt+0x93/0x93 [ 989.919548] Similarly dst_clone() can use dst_hold() helper to have additional debugging, as a follow up to commit 44ebe79149ff ("net: add debug atomic_inc_not_zero() in dst_hold()") In net-next we will convert dst atomic_t to refcount_t for peace of mind. Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag") Signed-off-by: Eric Dumazet Cc: Wei Wang Reported-by: Paweł Staszewski Bisected-by: Paweł Staszewski Acked-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/dst.h | 22 ++++------------------ include/net/route.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 93568bd0a352..06a6765da074 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) - atomic_inc(&dst->__refcnt); + dst_hold(dst); return dst; } @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb __skb_dst_copy(nskb, oskb->_skb_refdst); } -/** - * skb_dst_force - makes sure skb dst is refcounted - * @skb: buffer - * - * If dst is not yet refcounted, let's do it - */ -static inline void skb_dst_force(struct sk_buff *skb) -{ - if (skb_dst_is_noref(skb)) { - WARN_ON(!rcu_read_lock_held()); - skb->_skb_refdst &= ~SKB_DST_NOREF; - dst_clone(skb_dst(skb)); - } -} - /** * dst_hold_safe - Take a reference on a dst if possible * @dst: pointer to dst entry @@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst) } /** - * skb_dst_force_safe - makes sure skb dst is refcounted + * skb_dst_force - makes sure skb dst is refcounted * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. */ -static inline void skb_dst_force_safe(struct sk_buff *skb) +static inline void skb_dst_force(struct sk_buff *skb) { if (skb_dst_is_noref(skb)) { struct dst_entry *dst = skb_dst(skb); + WARN_ON(!rcu_read_lock_held()); if (!dst_hold_safe(dst)) dst = NULL; diff --git a/include/net/route.h b/include/net/route.h index 1b09a9368c68..57dfc6850d37 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) { - skb_dst_force_safe(skb); + skb_dst_force(skb); if (!skb_dst(skb)) err = -EINVAL; } diff --git a/include/net/sock.h b/include/net/sock.h index 03a362568357..a6b9a8d1a6df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force_safe(skb); + skb_dst_force(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; -- cgit From 237bbd29f7a049d310d907f4b2716a7feef9abf3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Sep 2017 11:37:03 -0700 Subject: KEYS: prevent creating a different user's keyrings It was possible for an unprivileged user to create the user and user session keyrings for another user. For example: sudo -u '#3000' sh -c 'keyctl add keyring _uid.4000 "" @u keyctl add keyring _uid_ses.4000 "" @u sleep 15' & sleep 1 sudo -u '#4000' keyctl describe @u sudo -u '#4000' keyctl describe @us This is problematic because these "fake" keyrings won't have the right permissions. In particular, the user who created them first will own them and will have full access to them via the possessor permissions, which can be used to compromise the security of a user's keys: -4: alswrv-----v------------ 3000 0 keyring: _uid.4000 -5: alswrv-----v------------ 3000 0 keyring: _uid_ses.4000 Fix it by marking user and user session keyrings with a flag KEY_FLAG_UID_KEYRING. Then, when searching for a user or user session keyring by name, skip all keyrings that don't have the flag set. Fixes: 69664cf16af4 ("keys: don't generate user and user session keyrings unless they're accessed") Cc: [v2.6.26+] Signed-off-by: Eric Biggers Signed-off-by: David Howells --- include/linux/key.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/key.h b/include/linux/key.h index 044114185120..e315e16b6ff8 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -187,6 +187,7 @@ struct key { #define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ #define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 10 /* set if key should not be removed */ +#define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -243,6 +244,7 @@ extern struct key *key_alloc(struct key_type *type, #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ #define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ #define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ +#define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); -- cgit From 5acb3cc2c2e9d3020a4fee43763c6463767f1572 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 20 Sep 2017 13:12:20 -0600 Subject: blktrace: Fix potential deadlock between delete & sysfs ops The lockdep code had reported the following unsafe locking scenario: CPU0 CPU1 ---- ---- lock(s_active#228); lock(&bdev->bd_mutex/1); lock(s_active#228); lock(&bdev->bd_mutex); *** DEADLOCK *** The deadlock may happen when one task (CPU1) is trying to delete a partition in a block device and another task (CPU0) is accessing tracing sysfs file (e.g. /sys/block/dm-1/trace/act_mask) in that partition. The s_active isn't an actual lock. It is a reference count (kn->count) on the sysfs (kernfs) file. Removal of a sysfs file, however, require a wait until all the references are gone. The reference count is treated like a rwsem using lockdep instrumentation code. The fact that a thread is in the sysfs callback method or in the ioctl call means there is a reference to the opended sysfs or device file. That should prevent the underlying block structure from being removed. Instead of using bd_mutex in the block_device structure, a new blk_trace_mutex is now added to the request_queue structure to protect access to the blk_trace structure. Suggested-by: Christoph Hellwig Signed-off-by: Waiman Long Acked-by: Steven Rostedt (VMware) Fix typo in patch subject line, and prune a comment detailing how the code used to work. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 460294bb0fa5..02fa42d24b52 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -551,6 +551,7 @@ struct request_queue { int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace *blk_trace; + struct mutex blk_trace_mutex; #endif /* * for flush operations -- cgit From c98cb3bd882119e7e1a7c8df2f1eacfcc701450b Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 7 Sep 2017 16:27:25 -0700 Subject: nvme.h: remove FC transport-specific error values The NVM express group recinded the reserved range for the transport. Remove the FC-centric values that had been defined. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 87723c86f136..2440be32be1d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1127,19 +1127,6 @@ enum { NVME_SC_UNWRITTEN_BLOCK = 0x287, NVME_SC_DNR = 0x4000, - - - /* - * FC Transport-specific error status values for NVME commands - * - * Transport-specific status code values must be in the range 0xB0..0xBF - */ - - /* Generic FC failure - catchall */ - NVME_SC_FC_TRANSPORT_ERROR = 0x00B0, - - /* I/O failure due to FC ABTS'd */ - NVME_SC_FC_TRANSPORT_ABORTED = 0x00B1, }; struct nvme_completion { -- cgit From d85cf207499e6740ab9c490ff4f360af5c432d23 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 7 Sep 2017 13:20:23 -0700 Subject: nvme: add transport SGL definitions Add transport SGL defintions from NVMe TP 4008, required for the final NVMe-FC standard. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2440be32be1d..9310ce77d8e1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -471,12 +471,14 @@ enum nvme_opcode { * * @NVME_SGL_FMT_ADDRESS: absolute address of the data block * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block + * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation * request subtype */ enum { NVME_SGL_FMT_ADDRESS = 0x00, NVME_SGL_FMT_OFFSET = 0x01, + NVME_SGL_FMT_TRANSPORT_A = 0x0A, NVME_SGL_FMT_INVALIDATE = 0x0f, }; @@ -490,12 +492,16 @@ enum { * * For struct nvme_keyed_sgl_desc: * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor + * + * Transport-specific SGL types: + * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor */ enum { NVME_SGL_FMT_DATA_DESC = 0x00, NVME_SGL_FMT_SEG_DESC = 0x02, NVME_SGL_FMT_LAST_SEG_DESC = 0x03, NVME_KEY_SGL_FMT_DATA_DESC = 0x04, + NVME_TRANSPORT_SGL_DATA_DESC = 0x05, }; struct nvme_sgl_desc { -- cgit From 78b1beb0998437107ed144b341fbe1252188916b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Sep 2017 21:46:29 +0300 Subject: IB/core: Fix typo in the name of the tag-matching cap struct The tag matching functionality is implemented by mlx5 driver by extending XRQ, however this internal kernel information was exposed to user space applications with *xrq* name instead of *tm*. This patch renames *xrq* to *tm* to handle that. Fixes: 8d50505ada72 ("IB/uverbs: Expose XRQ capabilities") Signed-off-by: Leon Romanovsky Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 4 ++-- include/uapi/rdma/ib_user_verbs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bdb1279a415b..bbb5f54db882 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -285,7 +285,7 @@ enum ib_tm_cap_flags { IB_TM_CAP_RC = 1 << 0, }; -struct ib_xrq_caps { +struct ib_tm_caps { /* Max size of RNDV header */ u32 max_rndv_hdr_size; /* Max number of entries in tag matching list */ @@ -358,7 +358,7 @@ struct ib_device_attr { struct ib_rss_caps rss_caps; u32 max_wq_type_rq; u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ - struct ib_xrq_caps xrq_caps; + struct ib_tm_caps tm_caps; }; enum ib_mtu { diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 9a0b6479fe0c..d4e0b53bfc75 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; __u32 raw_packet_caps; - struct ib_uverbs_tm_caps xrq_caps; + struct ib_uverbs_tm_caps tm_caps; }; struct ib_uverbs_query_port { -- cgit From edd31551148c09608feee6b8756ad148d550ee3b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 24 Sep 2017 21:46:31 +0300 Subject: IB: Correct MR length field to be 64-bit The ib_mr->length represents the length of the MR in bytes as per the IBTA spec 1.3 section 11.2.10.3 (REGISTER PHYSICAL MEMORY REGION). Currently ib_mr->length field is defined as only 32-bits field. This might result into truncation and failed WRs of consumers who registers more than 4GB bytes memory regions and whose WRs accessing such MRs. This patch makes the length 64-bit to avoid such truncation. Cc: Sagi Grimberg Cc: Chuck Lever Cc: Faisal Latif Fixes: 4c67e2bfc8b7 ("IB/core: Introduce new fast registration API") Signed-off-by: Ilya Lesokhin Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bbb5f54db882..e8608b2dc844 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1739,7 +1739,7 @@ struct ib_mr { u32 lkey; u32 rkey; u64 iova; - u32 length; + u64 length; unsigned int page_size; bool need_inval; union { -- cgit From fe59493240169a2cc3f445ae5f2a2308fda06b63 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Sep 2017 14:29:15 +0200 Subject: PCI: Add dummy pci_acs_enabled() for CONFIG_PCI=n build If CONFIG_PCI=n and gcc (e.g. 4.1.2) decides not to inline get_pci_function_alias_group(), the build fails with: drivers/iommu/iommu.o: In function `get_pci_function_alias_group': iommu.c:(.text+0xfdc): undefined reference to `pci_acs_enabled' Due to the various dummies for PCI calls in the CONFIG_PCI=n case, pci_acs_enabled() never called, but not all versions of gcc are smart enough to realize that. While explicitly marking get_pci_function_alias_group() inline would fix the build, this would inflate the code for the CONFIG_PCI=y case, as get_pci_function_alias_group() is a not-so-small function called from two places. Hence fix the issue by introducing a dummy for pci_acs_enabled() instead. Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Signed-off-by: Geert Uytterhoeven Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index f68c58a93dd0..f4f8ee5a7362 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1685,6 +1685,8 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } #define dev_is_pci(d) (false) #define dev_is_pf(d) (false) +static inline bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) +{ return false; } #endif /* CONFIG_PCI */ /* Include architecture-dependent settings and functions */ -- cgit From 6b71f9e1e849f82abb4a8d54ce7f4b1c71f19ac4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 20 Sep 2017 11:07:26 -0700 Subject: nvmet-fc: sync header templates with comments Comments were incorrect: - defer_rcv was in host port template. moved to target port template - Added Mandatory statements for target port template items Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 9c5cb4480806..a726f96010d5 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -346,11 +346,6 @@ struct nvme_fc_remote_port { * indicating an FC transport Aborted status. * Entrypoint is Mandatory. * - * @defer_rcv: Called by the transport to signal the LLLD that it has - * begun processing of a previously received NVME CMD IU. The LLDD - * is now free to re-use the rcv buffer associated with the - * nvmefc_tgt_fcp_req. - * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -806,11 +801,19 @@ struct nvmet_fc_target_port { * outstanding operation (if there was one) to complete, then will * call the fcp_req_release() callback to return the command's * exchange context back to the LLDD. + * Entrypoint is Mandatory. * * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req * to the LLDD after all operations on the fcp operation are complete. * This may be due to the command completing or upon completion of * abort cleanup. + * Entrypoint is Mandatory. + * + * @defer_rcv: Called by the transport to signal the LLLD that it has + * begun processing of a previously received NVME CMD IU. The LLDD + * is now free to re-use the rcv buffer associated with the + * nvmefc_tgt_fcp_req. + * Entrypoint is Optional. * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. -- cgit From fac1c2040203363eab6c6e86ce883cb71390418f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Sep 2017 19:00:15 +0200 Subject: smp/hotplug: Add state diagram Add a state diagram to clarify when which states are ran where. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: bigeasy@linutronix.de Cc: efault@gmx.de Cc: rostedt@goodmis.org Cc: max.byungchul.park@gmail.com Link: https://lkml.kernel.org/r/20170920170546.661598270@infradead.org --- include/linux/cpuhotplug.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f24bfb2b9a2d..477b2e6f60f7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -3,6 +3,24 @@ #include +/* + * CPU-up CPU-down + * + * BP AP BP AP + * + * OFFLINE OFFLINE + * | ^ + * v | + * BRINGUP_CPU->AP_OFFLINE BRINGUP_CPU <- AP_IDLE_DEAD (idle thread/play_dead) + * | AP_OFFLINE + * v (IRQ-off) ,---------------^ + * AP_ONLNE | (stop_machine) + * | TEARDOWN_CPU <- AP_ONLINE_IDLE + * | ^ + * v | + * AP_ACTIVE AP_ACTIVE + */ + enum cpuhp_state { CPUHP_OFFLINE, CPUHP_CREATE_THREADS, -- cgit From 1db49484f21ed0fcdadd0635a3669f5f386546fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Sep 2017 19:00:21 +0200 Subject: smp/hotplug: Hotplug state fail injection Add a sysfs file to one-time fail a specific state. This can be used to test the state rollback code paths. Something like this (hotplug-up.sh): #!/bin/bash echo 0 > /debug/sched_debug echo 1 > /debug/tracing/events/cpuhp/enable ALL_STATES=`cat /sys/devices/system/cpu/hotplug/states | cut -d':' -f1` STATES=${1:-$ALL_STATES} for state in $STATES do echo 0 > /sys/devices/system/cpu/cpu1/online echo 0 > /debug/tracing/trace echo Fail state: $state echo $state > /sys/devices/system/cpu/cpu1/hotplug/fail cat /sys/devices/system/cpu/cpu1/hotplug/fail echo 1 > /sys/devices/system/cpu/cpu1/online cat /debug/tracing/trace > hotfail-${state}.trace sleep 1 done Can be used to test for all possible rollback (barring multi-instance) scenarios on CPU-up, CPU-down is a trivial modification of the above. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: bigeasy@linutronix.de Cc: efault@gmx.de Cc: rostedt@goodmis.org Cc: max.byungchul.park@gmail.com Link: https://lkml.kernel.org/r/20170920170546.972581715@infradead.org --- include/linux/cpuhotplug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 477b2e6f60f7..6d508767e144 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -22,7 +22,8 @@ */ enum cpuhp_state { - CPUHP_OFFLINE, + CPUHP_INVALID = -1, + CPUHP_OFFLINE = 0, CPUHP_CREATE_THREADS, CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, -- cgit From 50ce6312f293e129eedf2affc7bd791c71d8287e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Tue, 26 Sep 2017 19:32:52 +0100 Subject: iommu: Fix comment for iommu_ops.map_sg The definition of map_sg was split during a recent addition to iommu_ops. Put it back together. Fixes: add02cfdc9bc ("iommu: Introduce Interface for IOMMU TLB Flushing") Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a7f2ac689d29..41b8c5757859 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -167,11 +167,11 @@ struct iommu_resv_region { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @map_sg: map a scatter-gather list of physically contiguous memory chunks + * to an iommu domain * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain * @tlb_range_add: Add a given iova range to the flush queue for this domain * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue - * to an iommu domain * @iova_to_phys: translate iova to physical address * @add_device: add device to iommu grouping * @remove_device: remove device from iommu grouping -- cgit From 686fef928bba6be13cabe639f154af7d72b63120 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 28 Sep 2017 06:38:17 -0700 Subject: timer: Prepare to change timer callback argument type Modern kernel callback systems pass the structure associated with a given callback to the callback function. The timer callback remains one of the legacy cases where an arbitrary unsigned long argument continues to be passed as the callback argument. This has several problems: - This bloats the timer_list structure with a normally redundant .data field. - No type checking is being performed, forcing callbacks to do explicit type casts of the unsigned long argument into the object that was passed, rather than using container_of(), as done in most of the other callback infrastructure. - Neighboring buffer overflows can overwrite both the .function and the .data field, providing attackers with a way to elevate from a buffer overflow into a simplistic ROP-like mechanism that allows calling arbitrary functions with a controlled first argument. - For future Control Flow Integrity work, this creates a unique function prototype for timer callbacks, instead of allowing them to continue to be clustered with other void functions that take a single unsigned long argument. This adds a new timer initialization API, which will ultimately replace the existing setup_timer(), setup_{deferrable,pinned,etc}_timer() family, named timer_setup() (to mirror hrtimer_setup(), making instances of its use much easier to grep for). In order to support the migration of existing timers into the new callback arguments, timer_setup() casts its arguments to the existing legacy types, and explicitly passes the timer pointer as the legacy data argument. Once all setup_*timer() callers have been replaced with timer_setup(), the casts can be removed, and the data argument can be dropped with the timer expiration code changed to just pass the timer to the callback directly. Since the regular pattern of using container_of() during local variable declaration repeats the need for the variable type declaration to be included, this adds a helper modeled after other from_*() helpers that wrap container_of(), named from_timer(). This helper uses typeof(*variable), removing the type redundancy and minimizing the need for line wraps in forthcoming conversions from "unsigned data long" to "struct timer_list *" in the timer callbacks: -void callback(unsigned long data) +void callback(struct timer_list *t) { - struct some_data_structure *local = (struct some_data_structure *)data; + struct some_data_structure *local = from_timer(local, t, timer); Finally, in order to support the handful of timer users that perform open-coded assignments of the .function (and .data) fields, provide cast macros (TIMER_FUNC_TYPE and TIMER_DATA_TYPE) that can be used temporarily. Once conversion has been completed, these can be globally trivially removed. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20170928133817.GA113410@beast --- include/linux/timer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index e6789b8757d5..6383c528b148 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -168,6 +168,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) +#define TIMER_DATA_TYPE unsigned long +#define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) + +static inline void timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *), + unsigned int flags) +{ + __setup_timer(timer, (TIMER_FUNC_TYPE)callback, + (TIMER_DATA_TYPE)timer, flags); +} + +#define from_timer(var, callback_timer, timer_fieldname) \ + container_of(callback_timer, typeof(*var), timer_fieldname) + /** * timer_pending - is a timer pending? * @timer: the timer in question -- cgit From 1593baab910da72480d651ea7bf2ce6e3a25a484 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:09:26 +0200 Subject: sched/debug: Implement consistent task-state printing Currently get_task_state() and task_state_to_char() report different states, create a number of common helpers and unify the reported state space. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 92fb8dd5a9e4..163a0b738908 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1243,17 +1243,29 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) return task_pgrp_nr_ns(tsk, &init_pid_ns); } -static inline char task_state_to_char(struct task_struct *task) +static inline unsigned int __get_task_state(struct task_struct *tsk) { - const char stat_nam[] = TASK_STATE_TO_CHAR_STR; - unsigned long state = task->state; + unsigned int tsk_state = READ_ONCE(tsk->state); + unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; - state = state ? __ffs(state) + 1 : 0; + if (tsk_state == TASK_PARKED) + state = TASK_INTERRUPTIBLE; - /* Make sure the string lines up properly with the number of task states: */ - BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); + return fls(state); +} + +static inline char __task_state_to_char(unsigned int state) +{ + static const char state_char[] = "RSDTtXZ"; + + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2); - return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'; + return state_char[state]; +} + +static inline char task_state_to_char(struct task_struct *tsk) +{ + return __task_state_to_char(__get_task_state(tsk)); } /** -- cgit From 92c4bc9f9cd92a8581e36bc5105f03b569f37e36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:13:36 +0200 Subject: sched/debug: Convert TASK_state to hex Bit patterns are easier in hex. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 163a0b738908..69bed5339ffa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -65,23 +65,23 @@ struct task_group; */ /* Used in tsk->state: */ -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define __TASK_STOPPED 4 -#define __TASK_TRACED 8 +#define TASK_RUNNING 0x0000 +#define TASK_INTERRUPTIBLE 0x0001 +#define TASK_UNINTERRUPTIBLE 0x0002 +#define __TASK_STOPPED 0x0004 +#define __TASK_TRACED 0x0008 /* Used in tsk->exit_state: */ -#define EXIT_DEAD 16 -#define EXIT_ZOMBIE 32 +#define EXIT_DEAD 0x0010 +#define EXIT_ZOMBIE 0x0020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->state again: */ -#define TASK_DEAD 64 -#define TASK_WAKEKILL 128 -#define TASK_WAKING 256 -#define TASK_PARKED 512 -#define TASK_NOLOAD 1024 -#define TASK_NEW 2048 -#define TASK_STATE_MAX 4096 +#define TASK_DEAD 0x0040 +#define TASK_WAKEKILL 0x0080 +#define TASK_WAKING 0x0100 +#define TASK_PARKED 0x0200 +#define TASK_NOLOAD 0x0400 +#define TASK_NEW 0x0800 +#define TASK_STATE_MAX 0x1000 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" -- cgit From efb40f588b4370ffaeffafbd50f6ff213d954254 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:19:53 +0200 Subject: sched/tracing: Fix trace_sched_switch task-state printing Convert trace_sched_switch to use the common task-state helpers and fix the "X" and "Z" order, possibly they ended up in the wrong order because TASK_REPORT has them in the wrong order too. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- include/trace/events/sched.h | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 69bed5339ffa..a2fe636b6825 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -99,7 +99,7 @@ struct task_group; /* get_task_state(): */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) + __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE) #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index ae1409ffe99a..c63e20c9ef24 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -114,7 +114,10 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct * * Preemption ignores task state, therefore preempted tasks are always * RUNNING (we will not have dequeued if state != RUNNING). */ - return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; + if (preempt) + return TASK_STATE_MAX; + + return __get_task_state(p); } #endif /* CREATE_TRACE_POINTS */ @@ -152,12 +155,13 @@ TRACE_EVENT(sched_switch, TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->prev_state & (TASK_STATE_MAX-1) ? - __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", - { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, - { 16, "Z" }, { 32, "X" }, { 64, "x" }, - { 128, "K" }, { 256, "W" }, { 512, "P" }, - { 1024, "N" }) : "R", + + (__entry->prev_state & TASK_REPORT) ? + __print_flags(__entry->prev_state & TASK_REPORT, "|", + { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, + { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }) : + "R", + __entry->prev_state & TASK_STATE_MAX ? "+" : "", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); -- cgit From 5f6ad26ea353fdf3dad2328052cbee49e0b9c5b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:23:31 +0200 Subject: sched/tracing: Use common task-state helpers Remove yet another task-state char instance. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index a2fe636b6825..bc7807933415 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -83,8 +83,6 @@ struct task_group; #define TASK_NEW 0x0800 #define TASK_STATE_MAX 0x1000 -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" - /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -- cgit From 06eb61844d841d0032a9950ce7f8e783ee49c0d0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:30:40 +0200 Subject: sched/debug: Add explicit TASK_IDLE printing Markus reported that kthreads that idle using TASK_IDLE instead of TASK_INTERRUPTIBLE are reported in as TASK_UNINTERRUPTIBLE and things like htop mark those red. This is undesirable, so add an explicit state for TASK_IDLE. Reported-by: Markus Trippelsdorf Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ++++++++++-- include/trace/events/sched.h | 7 ++++--- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index bc7807933415..286fc1117046 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1241,22 +1241,30 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) return task_pgrp_nr_ns(tsk, &init_pid_ns); } +#define TASK_REPORT_IDLE (TASK_REPORT + 1) +#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) + static inline unsigned int __get_task_state(struct task_struct *tsk) { unsigned int tsk_state = READ_ONCE(tsk->state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; + BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); + if (tsk_state == TASK_PARKED) state = TASK_INTERRUPTIBLE; + if (tsk_state == TASK_IDLE) + state = TASK_REPORT_IDLE; + return fls(state); } static inline char __task_state_to_char(unsigned int state) { - static const char state_char[] = "RSDTtXZ"; + static const char state_char[] = "RSDTtXZI"; - BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != sizeof(state_char) - 2); + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); return state_char[state]; } diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index c63e20c9ef24..b371ef8206e1 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -156,10 +156,11 @@ TRACE_EVENT(sched_switch, TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - (__entry->prev_state & TASK_REPORT) ? - __print_flags(__entry->prev_state & TASK_REPORT, "|", + (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? + __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, - { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }) : + { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, + { 0x40, "I" }) : "R", __entry->prev_state & TASK_STATE_MAX ? "+" : "", -- cgit From 8ef9925b02c23e3838d5e593c5cf37984141150f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Sep 2017 18:37:28 +0200 Subject: sched/debug: Add explicit TASK_PARKED printing Currently TASK_PARKED is masqueraded as TASK_INTERRUPTIBLE, give it its own print state because it will not in fact get woken by regular wakeups and is a long-term state. This requires moving TASK_PARKED into the TASK_REPORT mask, and since that latter needs to be a contiguous bitmask, we need to shuffle the bits around a bit. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 +++++++--------- include/trace/events/sched.h | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 286fc1117046..26a7df4e558c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -75,10 +75,10 @@ struct task_group; #define EXIT_ZOMBIE 0x0020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->state again: */ -#define TASK_DEAD 0x0040 -#define TASK_WAKEKILL 0x0080 -#define TASK_WAKING 0x0100 -#define TASK_PARKED 0x0200 +#define TASK_PARKED 0x0040 +#define TASK_DEAD 0x0080 +#define TASK_WAKEKILL 0x0100 +#define TASK_WAKING 0x0200 #define TASK_NOLOAD 0x0400 #define TASK_NEW 0x0800 #define TASK_STATE_MAX 0x1000 @@ -97,7 +97,8 @@ struct task_group; /* get_task_state(): */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE) + __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ + TASK_PARKED) #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) @@ -1251,9 +1252,6 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); - if (tsk_state == TASK_PARKED) - state = TASK_INTERRUPTIBLE; - if (tsk_state == TASK_IDLE) state = TASK_REPORT_IDLE; @@ -1262,7 +1260,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) static inline char __task_state_to_char(unsigned int state) { - static const char state_char[] = "RSDTtXZI"; + static const char state_char[] = "RSDTtXZPI"; BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b371ef8206e1..3c8b7f625670 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -160,7 +160,7 @@ TRACE_EVENT(sched_switch, __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, - { 0x40, "I" }) : + { 0x40, "P" }, { 0x80, "I" }) : "R", __entry->prev_state & TASK_STATE_MAX ? "+" : "", -- cgit