From 3f1b623a1be92103386bcab818e25885d6be9419 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Oct 2020 17:00:41 +0800 Subject: vdpa: introduce config op to get valid iova range This patch introduce a config op to get valid iova range from the vDPA device. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20201023090043.14430-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index eae0bfd87d91..30bc7a7223bb 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -52,6 +52,16 @@ struct vdpa_device { int nvqs; }; +/** + * vDPA IOVA range - the IOVA range support by the device + * @first: start of the IOVA range + * @last: end of the IOVA range + */ +struct vdpa_iova_range { + u64 first; + u64 last; +}; + /** * vDPA_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the @@ -151,6 +161,10 @@ struct vdpa_device { * @get_generation: Get device config generation (optional) * @vdev: vdpa device * Returns u32: device generation + * @get_iova_range: Get supported iova range (optional) + * @vdev: vdpa device + * Returns the iova range supported by + * the device. * @set_map: Set device memory mapping (optional) * Needed for device that using device * specific DMA translation (on-chip IOMMU) @@ -216,6 +230,7 @@ struct vdpa_config_ops { void (*set_config)(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int len); u32 (*get_generation)(struct vdpa_device *vdev); + struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev); /* DMA ops */ int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); -- cgit From 1b48dc03e575a872404f33b04cd237953c5d7498 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 23 Oct 2020 17:00:42 +0800 Subject: vhost: vdpa: report iova range This patch introduces a new ioctl for vhost-vdpa device that can report the iova range by the device. For device that implements get_iova_range() method, we fetch it from the vDPA device. If device doesn't implement get_iova_range() but depends on platform IOMMU, we will query via DOMAIN_ATTR_GEOMETRY, otherwise [0, ULLONG_MAX] is assumed. For safety, this patch also rules out the map request which is not in the valid range. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20201023090043.14430-3-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 4 ++++ include/uapi/linux/vhost_types.h | 9 +++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 9a269a88a6ff..f7f6a3a28977 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -138,6 +138,15 @@ struct vhost_vdpa_config { __u8 buf[0]; }; +/* vhost vdpa IOVA range + * @first: First address that can be mapped by vhost-vDPA + * @last: Last address that can be mapped by vhost-vDPA + */ +struct vhost_vdpa_iova_range { + __u64 first; + __u64 last; +}; + /* Feature bits */ /* Log all write descriptors. Can be changed while device is active. */ #define VHOST_F_LOG_ALL 26 -- cgit From e275d2109cdaea8b4554b9eb8a828bdb8f8ba068 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 26 Oct 2020 10:08:47 +0200 Subject: bus: ti-sysc: Fix reset status check for modules with quirks Commit d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") started showing a "OCP softreset timed out" warning on enable if the interconnect target module is not out of reset. This caused the warning to be often triggered for i2c and hdq while the devices are working properly. Turns out that some interconnect target modules seem to have an unusable reset status bits unless the module specific reset quirks are activated. Let's just skip the reset status check for those modules as we only want to activate the reset quirks when doing a reset, and not on enable. This way we don't see the bogus "OCP softreset timed out" warnings during boot. Fixes: d46f9fbec719 ("bus: ti-sysc: Use optional clocks on for enable and wait for softreset bit") Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index c59999ce044e..240dce553a0b 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) #define SYSC_MODULE_QUIRK_PRUSS BIT(24) #define SYSC_MODULE_QUIRK_DSS_RESET BIT(23) #define SYSC_MODULE_QUIRK_RTC_UNLOCK BIT(22) -- cgit From 58b24a38f0deac253ba9c5be128e3da6a86041ad Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 29 Jul 2019 14:26:15 +0200 Subject: gpu: ipu-v3: remove unused functions ipu_mbus_code_to_colorspace, ipu_stride_to_bytes, and ipu_pixelformat_is_planar are unused. Remove them. Signed-off-by: Philipp Zabel Reviewed-by: Sam Ravnborg --- include/video/imx-ipu-v3.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 06b0b57e996c..d1b3889f74d8 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -484,9 +484,6 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); -enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); -int ipu_stride_to_bytes(u32 pixel_stride, u32 pixelformat); -bool ipu_pixelformat_is_planar(u32 pixelformat); int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, bool hflip, bool vflip); int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, -- cgit From cb47755725da7b90fecbb2aa82ac3b24a7adb89b Mon Sep 17 00:00:00 2001 From: Zeng Tao Date: Tue, 1 Sep 2020 17:30:13 +0800 Subject: time: Prevent undefined behaviour in timespec64_to_ns() UBSAN reports: Undefined behaviour in ./include/linux/time64.h:127:27 signed integer overflow: 17179869187 * 1000000000 cannot be represented in type 'long long int' Call Trace: timespec64_to_ns include/linux/time64.h:127 [inline] set_cpu_itimer+0x65c/0x880 kernel/time/itimer.c:180 do_setitimer+0x8e/0x740 kernel/time/itimer.c:245 __x64_sys_setitimer+0x14c/0x2c0 kernel/time/itimer.c:336 do_syscall_64+0xa1/0x540 arch/x86/entry/common.c:295 Commit bd40a175769d ("y2038: itimer: change implementation to timespec64") replaced the original conversion which handled time clamping correctly with timespec64_to_ns() which has no overflow protection. Fix it in timespec64_to_ns() as this is not necessarily limited to the usage in itimers. [ tglx: Added comment and adjusted the fixes tag ] Fixes: 361a3bf00582 ("time64: Add time64.h header and define struct timespec64") Signed-off-by: Zeng Tao Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1598952616-6416-1-git-send-email-prime.zeng@hisilicon.com --- include/linux/time64.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/time64.h b/include/linux/time64.h index c9dcb3e5781f..5117cb5b5656 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -124,6 +124,10 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts) */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { + /* Prevent multiplication overflow */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return KTIME_MAX; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } -- cgit From f7b6603c666798a1f8379e692d11d500885f32d8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:35 +0200 Subject: ALSA: fix kernel-doc markups Kernel-doc markups should use this format: identifier - description There is a common comment marked, instead, with kernel-doc notation. Some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Acked-by: Mark Brown Link: https://lore.kernel.org/r/535182d6f55d7a7de293dda9676df68f5f60afc6.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Takashi Iwai --- include/sound/core.h | 3 ++- include/sound/pcm.h | 4 ++-- include/uapi/sound/compress_offload.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 381a010a1bd4..0462c577d7a3 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -332,7 +332,8 @@ void __snd_printk(unsigned int level, const char *file, int line, #define snd_BUG() WARN(1, "BUG?\n") /** - * Suppress high rates of output when CONFIG_SND_DEBUG is enabled. + * snd_printd_ratelimit - Suppress high rates of output when + * CONFIG_SND_DEBUG is enabled. */ #define snd_printd_ratelimit() printk_ratelimit() diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 2ba5df2c9e23..2336bf9243e1 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1284,8 +1284,8 @@ snd_pcm_sgbuf_get_ptr(struct snd_pcm_substream *substream, unsigned int ofs) } /** - * snd_pcm_sgbuf_chunk_size - Compute the max size that fits within the contig. - * page from the given size + * snd_pcm_sgbuf_get_chunk_size - Compute the max size that fits within the + * contig. page from the given size * @substream: PCM substream * @ofs: byte offset * @size: byte size to examine diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 7184265c0b0d..9555f31c8425 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -144,7 +144,7 @@ struct snd_compr_metadata { __u32 value[8]; } __attribute__((packed, aligned(4))); -/** +/* * compress path ioctl definitions * SNDRV_COMPRESS_GET_CAPS: Query capability of DSP * SNDRV_COMPRESS_GET_CODEC_CAPS: Query capability of a codec -- cgit From 7f32b10c6b461a369b9741623cd3f722134066f0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 21 Oct 2020 14:17:26 +0200 Subject: kunit: test: fix remaining kernel-doc warnings test.h still produce three warnings: include/kunit/test.h:282: warning: Function parameter or member '__suites' not described in 'kunit_test_suites_for_module' include/kunit/test.h:282: warning: Excess function parameter 'suites_list' description in 'kunit_test_suites_for_module' include/kunit/test.h:314: warning: Excess function parameter 'suites' description in 'kunit_test_suites' They're all due to errors at kernel-doc markups. Update them. It should be noticed that this patch moved a kernel-doc markup that were located at the wrong place, and using a wrong name. Kernel-doc only supports kaving the markup just before the function/macro declaration. Placing it elsewhere will make it do wrong assumptions. Fixes: aac35468ca20 ("kunit: test: create a single centralized executor for all tests") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index 9197da792336..db1b0ae666c4 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -252,13 +252,14 @@ static inline int kunit_run_all_tests(void) } #endif /* IS_BUILTIN(CONFIG_KUNIT) */ +#ifdef MODULE /** - * kunit_test_suites() - used to register one or more &struct kunit_suite - * with KUnit. + * kunit_test_suites_for_module() - used to register one or more + * &struct kunit_suite with KUnit. * - * @suites_list...: a statically allocated list of &struct kunit_suite. + * @__suites: a statically allocated list of &struct kunit_suite. * - * Registers @suites_list with the test framework. See &struct kunit_suite for + * Registers @__suites with the test framework. See &struct kunit_suite for * more information. * * If a test suite is built-in, module_init() gets translated into @@ -267,7 +268,6 @@ static inline int kunit_run_all_tests(void) * module_{init|exit} functions for the builtin case when registering * suites via kunit_test_suites() below. */ -#ifdef MODULE #define kunit_test_suites_for_module(__suites) \ static int __init kunit_test_suites_init(void) \ { \ @@ -294,7 +294,7 @@ static inline int kunit_run_all_tests(void) * kunit_test_suites() - used to register one or more &struct kunit_suite * with KUnit. * - * @suites: a statically allocated list of &struct kunit_suite. + * @__suites: a statically allocated list of &struct kunit_suite. * * Registers @suites with the test framework. See &struct kunit_suite for * more information. @@ -308,10 +308,10 @@ static inline int kunit_run_all_tests(void) * module. * */ -#define kunit_test_suites(...) \ +#define kunit_test_suites(__suites...) \ __kunit_test_suites(__UNIQUE_ID(array), \ __UNIQUE_ID(suites), \ - __VA_ARGS__) + ##__suites) #define kunit_test_suite(suite) kunit_test_suites(&suite) -- cgit From 2a6eca16f376f6b83aaf73c57f0b6547907a5ed3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:52:18 +0100 Subject: ALSA: make snd_kcontrol_new name a normal string When building with W=2, there are lots of warnings about the snd_kcontrol_new name field being an array of 'unsigned char' but initialized to a string: include/sound/soc.h:93:48: warning: pointer targets in initialization of 'const unsigned char *' from 'char *' differ in signedness [-Wpointer-sign] Make it a regular 'char *' to avoid flooding the build log with this. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201026165715.3723704-1-arnd@kernel.org Signed-off-by: Takashi Iwai --- include/sound/control.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/control.h b/include/sound/control.h index e128cff10dfa..77d9fa10812d 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -42,7 +42,7 @@ struct snd_kcontrol_new { snd_ctl_elem_iface_t iface; /* interface identifier */ unsigned int device; /* device/client number */ unsigned int subdevice; /* subdevice (substream) number */ - const unsigned char *name; /* ASCII name of item */ + const char *name; /* ASCII name of item */ unsigned int index; /* index of item */ unsigned int access; /* access rights */ unsigned int count; /* count of same elements */ -- cgit From fbdd0049d98d44914fc57d4b91f867f4996c787b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 26 Oct 2020 15:43:59 +0200 Subject: RDMA/mlx5: Fix devlink deadlock on net namespace deletion When a mlx5 core devlink instance is reloaded in different net namespace, its associated IB device is deleted and recreated. Example sequence is: $ ip netns add foo $ devlink dev reload pci/0000:00:08.0 netns foo $ ip netns del foo mlx5 IB device needs to attach and detach the netdevice to it through the netdev notifier chain during load and unload sequence. A below call graph of the unload flow. cleanup_net() down_read(&pernet_ops_rwsem); <- first sem acquired ops_pre_exit_list() pre_exit() devlink_pernet_pre_exit() devlink_reload() mlx5_devlink_reload_down() mlx5_unload_one() [...] mlx5_ib_remove() mlx5_ib_unbind_slave_port() mlx5_remove_netdev_notifier() unregister_netdevice_notifier() down_write(&pernet_ops_rwsem);<- recurrsive lock Hence, when net namespace is deleted, mlx5 reload results in deadlock. When deadlock occurs, devlink mutex is also held. This not only deadlocks the mlx5 device under reload, but all the processes which attempt to access unrelated devlink devices are deadlocked. Hence, fix this by mlx5 ib driver to register for per net netdev notifier instead of global one, which operats on the net namespace without holding the pernet_ops_rwsem. Fixes: 4383cfcc65e7 ("net/mlx5: Add devlink reload") Link: https://lore.kernel.org/r/20201026134359.23150-1-parav@nvidia.com Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index add85094f9a5..0f23e1ed5e71 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1213,4 +1213,22 @@ static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) return val.vbool; } +/** + * mlx5_core_net - Provide net namespace of the mlx5_core_dev + * @dev: mlx5 core device + * + * mlx5_core_net() returns the net namespace of mlx5 core device. + * This can be called only in below described limited context. + * (a) When a devlink instance for mlx5_core is registered and + * when devlink reload operation is disabled. + * or + * (b) during devlink reload reload_down() and reload_up callbacks + * where it is ensured that devlink instance's net namespace is + * stable. + */ +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + #endif /* MLX5_DRIVER_H */ -- cgit From 29813a2297910d5c4be08c7b390054f23dd794a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 16:53:48 +0100 Subject: asm-generic: percpu: avoid Wshadow warning Nesting macros that use the same local variable names causes warnings when building with "make W=2": include/asm-generic/percpu.h:117:14: warning: declaration of '__ret' shadows a previous local [-Wshadow] include/asm-generic/percpu.h:126:14: warning: declaration of '__ret' shadows a previous local [-Wshadow] These are fairly harmless, but since the warning comes from a global header, the warning happens every time the headers are included, which is fairly annoying. Rename the variables to avoid shadowing and shut up the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Luc Van Oostenryck Signed-off-by: Dennis Zhou --- include/asm-generic/percpu.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 35e4a53b83e6..6432a7fade91 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -114,21 +114,21 @@ do { \ #define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ - typeof(pcp) __ret; \ + typeof(pcp) ___ret; \ preempt_disable_notrace(); \ - __ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ + ___ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ - __ret; \ + ___ret; \ }) #define __this_cpu_generic_read_noirq(pcp) \ ({ \ - typeof(pcp) __ret; \ - unsigned long __flags; \ - raw_local_irq_save(__flags); \ - __ret = raw_cpu_generic_read(pcp); \ - raw_local_irq_restore(__flags); \ - __ret; \ + typeof(pcp) ___ret; \ + unsigned long ___flags; \ + raw_local_irq_save(___flags); \ + ___ret = raw_cpu_generic_read(pcp); \ + raw_local_irq_restore(___flags); \ + ___ret; \ }) #define this_cpu_generic_read(pcp) \ -- cgit From 08989335e2b6b549ab20dd41ba2f9ca9782f3cd8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:28 +0100 Subject: drm: drm_edid: remove a duplicated kernel-doc declaration It is not possible to create cross-references for duplicated symbols. While Sphinx always detected it, on Sphinx 3 it generates warnings like this: .../Documentation/gpu/drm-kms-helpers:326: ../drivers/gpu/drm/drm_edid.c:1626: WARNING: Duplicate C declaration, also defined in 'gpu/drm-kms-helpers'. Declaration is 'bool drm_edid_are_equal (const struct edid *edid1, const struct edid *edid2)'. So, get rid of the duplicated kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/9310f4074fa9d29cd3ad60684d86d0ace8dab7ae.1603791716.git.mchehab+huawei@kernel.org --- include/drm/drm_edid.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b27a0e2169c8..e97daf6ffbb1 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -359,13 +359,6 @@ drm_load_edid_firmware(struct drm_connector *connector) } #endif -/** - * drm_edid_are_equal - compare two edid blobs. - * @edid1: pointer to first blob - * @edid2: pointer to second blob - * This helper can be used during probing to determine if - * edid had changed. - */ bool drm_edid_are_equal(const struct edid *edid1, const struct edid *edid2); int -- cgit From 38a8b32f467a9389ff413574968baa8777c77355 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:31 +0100 Subject: drm: kernel-doc: drm_dp_helper.h: fix a typo Right now, kernel-doc generates a warning: ./include/drm/drm_dp_helper.h:1786: warning: Function parameter or member 'hbr2_reset' not described in 'drm_dp_phy_test_params' This is due to a typo: @hb2_reset -> @hbr2_reset Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/2a615cb38e951215bb1bddc2481ad323c9cf3fc9.1603791716.git.mchehab+huawei@kernel.org --- include/drm/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index da53aebb7230..a53243abd945 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1836,7 +1836,7 @@ static inline void drm_dp_cec_unset_edid(struct drm_dp_aux *aux) * @link_rate: Requested Link rate from DPCD 0x219 * @num_lanes: Number of lanes requested by sing through DPCD 0x220 * @phy_pattern: DP Phy test pattern from DPCD 0x248 - * @hb2_reset: DP HBR2_COMPLIANCE_SCRAMBLER_RESET from DCPD 0x24A and 0x24B + * @hbr2_reset: DP HBR2_COMPLIANCE_SCRAMBLER_RESET from DCPD 0x24A and 0x24B * @custom80: DP Test_80BIT_CUSTOM_PATTERN from DPCDs 0x250 through 0x259 * @enhanced_frame_cap: flag for enhanced frame capability. */ -- cgit From b52817e9de06a3af4ebefd6d244c9c750903d79c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:35 +0100 Subject: drm: drm_print.h: fix kernel-doc markups A kernel-doc markup should start with the identifier on its first line. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/5b76c5625709aaaa3abee98faa620b9f3d27ff85.1603791716.git.mchehab+huawei@kernel.org --- include/drm/drm_print.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 1c9417430d08..f32d179e139d 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -338,7 +338,7 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, const char *format, ...); /** - * Error output. + * DRM_DEV_ERROR() - Error output. * * @dev: device pointer * @fmt: printf() like format string. @@ -347,10 +347,12 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, drm_dev_printk(dev, KERN_ERR, "*ERROR* " fmt, ##__VA_ARGS__) /** - * Rate limited error output. Like DRM_ERROR() but won't flood the log. + * DRM_DEV_ERROR_RATELIMITED() - Rate limited error output. * * @dev: device pointer * @fmt: printf() like format string. + * + * Like DRM_ERROR() but won't flood the log. */ #define DRM_DEV_ERROR_RATELIMITED(dev, fmt, ...) \ ({ \ @@ -375,15 +377,27 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, }) /** - * Debug output. + * DRM_DEV_DEBUG() - Debug output for generic drm code * * @dev: device pointer * @fmt: printf() like format string. */ #define DRM_DEV_DEBUG(dev, fmt, ...) \ drm_dev_dbg(dev, DRM_UT_CORE, fmt, ##__VA_ARGS__) +/** + * DRM_DEV_DEBUG_DRIVER() - Debug output for vendor specific part of the driver + * + * @dev: device pointer + * @fmt: printf() like format string. + */ #define DRM_DEV_DEBUG_DRIVER(dev, fmt, ...) \ drm_dev_dbg(dev, DRM_UT_DRIVER, fmt, ##__VA_ARGS__) +/** + * DRM_DEV_DEBUG_KMS() - Debug output for modesetting code + * + * @dev: device pointer + * @fmt: printf() like format string. + */ #define DRM_DEV_DEBUG_KMS(dev, fmt, ...) \ drm_dev_dbg(dev, DRM_UT_KMS, fmt, ##__VA_ARGS__) -- cgit From 0bcd0a2be8c9ef39d84d167ff85359a49f7be175 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Oct 2020 09:50:17 +0100 Subject: asm-generic: mark __{get,put}_user_fn as __always_inline Without the explicit __always_inline, some RISC-V configs place the functions out of line, triggering the BUILD_BUG_ON checks in the function. Fixes: 11129e8ed4d9 ("riscv: use memcpy based uaccess for nommu again") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Signed-off-by: Arnd Bergmann --- include/asm-generic/uaccess.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 45f9872fd747..4973328f3c6e 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -12,7 +12,8 @@ #ifdef CONFIG_UACCESS_MEMCPY #include -static inline int __get_user_fn(size_t size, const void __user *from, void *to) +static __always_inline int +__get_user_fn(size_t size, const void __user *from, void *to) { BUILD_BUG_ON(!__builtin_constant_p(size)); @@ -37,7 +38,8 @@ static inline int __get_user_fn(size_t size, const void __user *from, void *to) } #define __get_user_fn(sz, u, k) __get_user_fn(sz, u, k) -static inline int __put_user_fn(size_t size, void __user *to, void *from) +static __always_inline int +__put_user_fn(size_t size, void __user *to, void *from) { BUILD_BUG_ON(!__builtin_constant_p(size)); -- cgit From 343a3e8bc635bd4c58d45a4fe67f9c3a78fbd191 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:20:50 +0100 Subject: bpf: Fix -Wshadow warnings There are thousands of warnings about one macro in a W=2 build: include/linux/filter.h:561:6: warning: declaration of 'ret' shadows a previous local [-Wshadow] Prefix all the locals in that macro with __ to avoid most of these warnings. Fixes: 492ecee892c2 ("bpf: enable program stats") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201026162110.3710415-1-arnd@kernel.org --- include/linux/filter.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 72d62cbc1578..1b62397bd124 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -558,21 +558,21 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ - u32 ret; \ + u32 __ret; \ cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ + struct bpf_prog_stats *__stats; \ + u64 __start = sched_clock(); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&__stats->syncp); \ + __stats->cnt++; \ + __stats->nsecs += sched_clock() - __start; \ + u64_stats_update_end(&__stats->syncp); \ } else { \ - ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ } \ - ret; }) + __ret; }) #define BPF_PROG_RUN(prog, ctx) \ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) -- cgit From 1c534352f47fd83eb08075ac2474f707e74bf7f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:19 +0200 Subject: cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS driver flag Generally, a cpufreq driver may need to update some internal upper and lower frequency boundaries on policy max and min changes, respectively, but currently this does not work if the target frequency does not change along with the policy limit. Namely, if the target frequency does not change along with the policy min or max, the "target_freq == policy->cur" check in __cpufreq_driver_target() prevents driver callbacks from being invoked and they do not even have a chance to update the corresponding internal boundary. This particularly affects the "powersave" and "performance" governors that always set the target frequency to one of the policy limits and it never changes when the other limit is updated. To allow cpufreq the drivers needing to update internal frequency boundaries on policy limits changes to avoid this issue, introduce a new driver flag, CPUFREQ_NEED_UPDATE_LIMITS, that (when set) will neutralize the check mentioned above. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index fa37b1c66443..038ed83aab41 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -298,7 +298,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name) struct cpufreq_driver { char name[CPUFREQ_NAME_LEN]; - u8 flags; + u16 flags; void *driver_data; /* needed by all drivers */ @@ -422,6 +422,14 @@ struct cpufreq_driver { */ #define CPUFREQ_IS_COOLING_DEV BIT(7) +/* + * Set by drivers that need to update internale upper and lower boundaries along + * with the target frequency and so the core and governors should also invoke + * the diver if the target frequency does not change, but the policy min or max + * may have changed. + */ +#define CPUFREQ_NEED_UPDATE_LIMITS BIT(8) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit From 3e6631485fae70f474d5bd85cfaf0f113f61ccce Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 4 Oct 2020 19:57:20 -0700 Subject: vmlinux.lds.h: Keep .ctors.* with .ctors Under some circumstances, the compiler generates .ctors.* sections. This is seen doing a cross compile of x86_64 from a powerpc64el host: x86_64-linux-gnu-ld: warning: orphan section `.ctors.65435' from `kernel/trace/trace_clock.o' being placed in section `.ctors.65435' x86_64-linux-gnu-ld: warning: orphan section `.ctors.65435' from `kernel/trace/ftrace.o' being placed in section `.ctors.65435' x86_64-linux-gnu-ld: warning: orphan section `.ctors.65435' from `kernel/trace/ring_buffer.o' being placed in section `.ctors.65435' Include these orphans along with the regular .ctors section. Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell Fixes: 83109d5d5fba ("x86/build: Warn on orphan section placement") Signed-off-by: Kees Cook Acked-by: Nick Desaulniers Link: https://lore.kernel.org/r/20201005025720.2599682-1-keescook@chromium.org --- include/asm-generic/vmlinux.lds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index cd14444bf600..b2b3d81b1535 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -701,6 +701,7 @@ #ifdef CONFIG_CONSTRUCTORS #define KERNEL_CTORS() . = ALIGN(8); \ __ctors_start = .; \ + KEEP(*(SORT(.ctors.*))) \ KEEP(*(.ctors)) \ KEEP(*(SORT(.init_array.*))) \ KEEP(*(.init_array)) \ -- cgit From 1de111b51b829bcf01d2e57971f8fd07a665fa3f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 23 Oct 2020 08:47:50 -0700 Subject: KVM: arm64: ARM_SMCCC_ARCH_WORKAROUND_1 doesn't return SMCCC_RET_NOT_REQUIRED According to the SMCCC spec[1](7.5.2 Discovery) the ARM_SMCCC_ARCH_WORKAROUND_1 function id only returns 0, 1, and SMCCC_RET_NOT_SUPPORTED. 0 is "workaround required and safe to call this function" 1 is "workaround not required but safe to call this function" SMCCC_RET_NOT_SUPPORTED is "might be vulnerable or might not be, who knows, I give up!" SMCCC_RET_NOT_SUPPORTED might as well mean "workaround required, except calling this function may not work because it isn't implemented in some cases". Wonderful. We map this SMC call to 0 is SPECTRE_MITIGATED 1 is SPECTRE_UNAFFECTED SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE For KVM hypercalls (hvc), we've implemented this function id to return SMCCC_RET_NOT_SUPPORTED, 0, and SMCCC_RET_NOT_REQUIRED. One of those isn't supposed to be there. Per the code we call arm64_get_spectre_v2_state() to figure out what to return for this feature discovery call. 0 is SPECTRE_MITIGATED SMCCC_RET_NOT_REQUIRED is SPECTRE_UNAFFECTED SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE Let's clean this up so that KVM tells the guest this mapping: 0 is SPECTRE_MITIGATED 1 is SPECTRE_UNAFFECTED SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE Note: SMCCC_RET_NOT_AFFECTED is 1 but isn't part of the SMCCC spec Fixes: c118bbb52743 ("arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests") Signed-off-by: Stephen Boyd Acked-by: Marc Zyngier Acked-by: Will Deacon Cc: Andre Przywara Cc: Steven Price Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: https://developer.arm.com/documentation/den0028/latest [1] Link: https://lore.kernel.org/r/20201023154751.1973872-1-swboyd@chromium.org Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 885c9ffc835c..f860645f6512 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -87,6 +87,8 @@ ARM_SMCCC_SMC_32, \ 0, 0x7fff) +#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 + /* Paravirtualised time calls (defined by ARM DEN0057A) */ #define ARM_SMCCC_HV_PV_TIME_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit From 071ba4cc559de47160761b9500b72e8fa09d923d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 11:25:49 -0300 Subject: RDMA: Add rdma_connect_locked() There are two flows for handling RDMA_CM_EVENT_ROUTE_RESOLVED, either the handler triggers a completion and another thread does rdma_connect() or the handler directly calls rdma_connect(). In all cases rdma_connect() needs to hold the handler_mutex, but when handler's are invoked this is already held by the core code. This causes ULPs using the 2nd method to deadlock. Provide a rdma_connect_locked() and have all ULPs call it from their handlers. Link: https://lore.kernel.org/r/0-v2-53c22d5c1405+33-rdma_connect_locking_jgg@nvidia.com Reported-and-tested-by: Guoqing Jiang Fixes: 2a7cec538169 ("RDMA/cma: Fix locking for the RDMA_CM_CONNECT state") Acked-by: Santosh Shilimkar Acked-by: Jack Wang Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Jason Gunthorpe --- include/rdma/rdma_cm.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index c672ae1da26b..32a67af18415 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -227,19 +227,9 @@ void rdma_destroy_qp(struct rdma_cm_id *id); int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); -/** - * rdma_connect - Initiate an active connection request. - * @id: Connection identifier to connect. - * @conn_param: Connection information used for connected QPs. - * - * Users must have resolved a route for the rdma_cm_id to connect with - * by having called rdma_resolve_route before calling this routine. - * - * This call will either connect to a remote QP or obtain remote QP - * information for unconnected rdma_cm_id's. The actual operation is - * based on the rdma_cm_id's port space. - */ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); +int rdma_connect_locked(struct rdma_cm_id *id, + struct rdma_conn_param *conn_param); int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, struct rdma_ucm_ece *ece); -- cgit From cbdc0f54560f94c2205ddbebb5464d65868af0d8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:18 +0200 Subject: usb: fix kernel-doc markups There is a common comment marked, instead, with kernel-doc notation. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Signed-off-by: Mauro Carvalho Chehab Acked-by: Felipe Balbi Link: https://lore.kernel.org/r/0b964be3884def04fcd20ea5c12cb90d0014871c.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 2040696d75b6..a2d229ab63ba 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -437,7 +437,7 @@ static inline struct usb_composite_driver *to_cdriver( #define OS_STRING_IDX 0xEE /** - * struct usb_composite_device - represents one composite usb gadget + * struct usb_composite_dev - represents one composite usb gadget * @gadget: read-only, abstracts the gadget's usb peripheral controller * @req: used for control responses; buffer is pre-allocated * @os_desc_req: used for OS descriptors responses; buffer is pre-allocated -- cgit From 13150bc5416f45234c955e5bed91623d178c6117 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 27 Oct 2020 16:11:32 +0100 Subject: module: use hidden visibility for weak symbol references Geert reports that commit be2881824ae9eb92 ("arm64/build: Assert for unwanted sections") results in build errors on arm64 for configurations that have CONFIG_MODULES disabled. The commit in question added ASSERT()s to the arm64 linker script to ensure that linker generated sections such as .got.plt etc are empty, but as it turns out, there are corner cases where the linker does emit content into those sections. More specifically, weak references to function symbols (which can remain unsatisfied, and can therefore not be emitted as relative references) will be emitted as GOT and PLT entries when linking the kernel in PIE mode (which is the case when CONFIG_RELOCATABLE is enabled, which is on by default). What happens is that code such as struct device *(*fn)(struct device *dev); struct device *iommu_device; fn = symbol_get(mdev_get_iommu_device); if (fn) { iommu_device = fn(dev); essentially gets converted into the following when CONFIG_MODULES is off: struct device *iommu_device; if (&mdev_get_iommu_device) { iommu_device = mdev_get_iommu_device(dev); where mdev_get_iommu_device is emitted as a weak symbol reference into the object file. The first reference is decorated with an ordinary ABS64 data relocation (which yields 0x0 if the reference remains unsatisfied). However, the indirect call is turned into a direct call covered by a R_AARCH64_CALL26 relocation, which is converted into a call via a PLT entry taking the target address from the associated GOT entry. Given that such GOT and PLT entries are unnecessary for fully linked binaries such as the kernel, let's give these weak symbol references hidden visibility, so that the linker knows that the weak reference via R_AARCH64_CALL26 can simply remain unsatisfied. Signed-off-by: Ard Biesheuvel Tested-by: Geert Uytterhoeven Reviewed-by: Fangrui Song Acked-by: Jessica Yu Cc: Jessica Yu Cc: Kees Cook Cc: Geert Uytterhoeven Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20201027151132.14066-1-ardb@kernel.org Signed-off-by: Will Deacon --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index 7ccdf87f376f..6264617bab4d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -740,7 +740,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) } /* Get/put a kernel symbol (calls should be symmetric) */ -#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); }) +#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); }) #define symbol_put(x) do { } while (0) #define symbol_put_addr(x) do { } while (0) -- cgit From 5760648e63e6c1006a3ed0bfc2167f623b8bcbcd Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:25 +0800 Subject: gpio: uapi: fix kernel-doc warnings Fix kernel-doc warnings, specifically gpioline_info_changed.padding is not documented and 'GPIO event types' describes defines, which are not documented by kernel-doc. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-2-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 07865c601099..b0d5e7a1c693 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -346,6 +346,7 @@ enum { * @timestamp: estimate of time of status change occurrence, in nanoseconds * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED * and GPIOLINE_CHANGED_CONFIG + * @padding: reserved for future use * * Note: struct gpioline_info embedded here has 32-bit alignment on its own, * but it works fine with 64-bit alignment too. With its 72 byte size, we can @@ -469,7 +470,7 @@ struct gpioevent_request { int fd; }; -/** +/* * GPIO event types */ #define GPIOEVENT_EVENT_RISING_EDGE 0x01 -- cgit From f20160217537e9006ce4a625da62b358416fc4ed Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:26 +0800 Subject: gpio: uapi: comment consistency Make debounce_period_us field documentation consistent with other fields in the union. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-3-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index b0d5e7a1c693..1fdb0e851f83 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -98,7 +98,7 @@ struct gpio_v2_line_values { * identifying which field of the attribute union is in use. * @GPIO_V2_LINE_ATTR_ID_FLAGS: flags field is in use * @GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES: values field is in use - * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us is in use + * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us field is in use */ enum gpio_v2_line_attr_id { GPIO_V2_LINE_ATTR_ID_FLAGS = 1, -- cgit From 2cc522d3931ba2aa744d09d41f874d61bf3a1851 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:27 +0800 Subject: gpio: uapi: kernel-doc formatting improvements Add kernel-doc formatting to all references to structs, enums, fields and constants, and move deprecation warnings into the Note section of the deprecated struct. Replace 'OR:ed' with 'added', as the former looks odd. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-4-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 93 ++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 1fdb0e851f83..32dd18f238c3 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -110,17 +110,17 @@ enum gpio_v2_line_attr_id { * struct gpio_v2_line_attribute - a configurable attribute of a line * @id: attribute identifier with value from &enum gpio_v2_line_attr_id * @padding: reserved for future use and must be zero filled - * @flags: if id is GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO - * line, with values from enum gpio_v2_line_flag, such as - * GPIO_V2_LINE_FLAG_ACTIVE_LOW, GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed + * @flags: if id is %GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO + * line, with values from &enum gpio_v2_line_flag, such as + * %GPIO_V2_LINE_FLAG_ACTIVE_LOW, %GPIO_V2_LINE_FLAG_OUTPUT etc, added * together. This overrides the default flags contained in the &struct * gpio_v2_line_config for the associated line. - * @values: if id is GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap + * @values: if id is %GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap * containing the values to which the lines will be set, with each bit * number corresponding to the index into &struct * gpio_v2_line_request.offsets. - * @debounce_period_us: if id is GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the desired - * debounce period, in microseconds + * @debounce_period_us: if id is %GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the + * desired debounce period, in microseconds */ struct gpio_v2_line_attribute { __u32 id; @@ -147,12 +147,12 @@ struct gpio_v2_line_config_attribute { /** * struct gpio_v2_line_config - Configuration for GPIO lines - * @flags: flags for the GPIO lines, with values from enum - * gpio_v2_line_flag, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, - * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together. This is the default for + * @flags: flags for the GPIO lines, with values from &enum + * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together. This is the default for * all requested lines but may be overridden for particular lines using - * attrs. - * @num_attrs: the number of attributes in attrs + * @attrs. + * @num_attrs: the number of attributes in @attrs * @padding: reserved for future use and must be zero filled * @attrs: the configuration attributes associated with the requested * lines. Any attribute should only be associated with a particular line @@ -175,17 +175,17 @@ struct gpio_v2_line_config { * "my-bitbanged-relay" * @config: requested configuration for the lines. * @num_lines: number of lines requested in this request, i.e. the number - * of valid fields in the GPIO_V2_LINES_MAX sized arrays, set to 1 to + * of valid fields in the %GPIO_V2_LINES_MAX sized arrays, set to 1 to * request a single line * @event_buffer_size: a suggested minimum number of line events that the * kernel should buffer. This is only relevant if edge detection is * enabled in the configuration. Note that this is only a suggested value * and the kernel may allocate a larger buffer or cap the size of the * buffer. If this field is zero then the buffer size defaults to a minimum - * of num_lines*16. + * of @num_lines * 16. * @padding: reserved for future use and must be zero filled * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINE_IOCTL operation, zero or negative value means + * after a %GPIO_GET_LINE_IOCTL operation, zero or negative value means * error */ struct gpio_v2_line_request { @@ -207,11 +207,12 @@ struct gpio_v2_line_request { * @consumer: a functional name for the consumer of this GPIO line as set * by whatever is using it, will be empty if there is no current user but * may also be empty if the consumer doesn't set this up - * @flags: flags for the GPIO line, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, - * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together * @offset: the local offset on this GPIO chip, fill this in when * requesting the line information from the kernel - * @num_attrs: the number of attributes in attrs + * @num_attrs: the number of attributes in @attrs + * @flags: flags for the GPIO lines, with values from &enum + * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together. * @attrs: the configuration attributes associated with the line * @padding: reserved for future use */ @@ -244,7 +245,7 @@ enum gpio_v2_line_changed_type { * of a GPIO line * @info: updated line information * @timestamp_ns: estimate of time of status change occurrence, in nanoseconds - * @event_type: the type of change with a value from enum + * @event_type: the type of change with a value from &enum * gpio_v2_line_changed_type * @padding: reserved for future use */ @@ -269,10 +270,10 @@ enum gpio_v2_line_event_id { /** * struct gpio_v2_line_event - The actual event being pushed to userspace * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds. - * The timestamp_ns is read from CLOCK_MONOTONIC and is intended to allow the - * accurate measurement of the time between events. It does not provide + * The @timestamp_ns is read from %CLOCK_MONOTONIC and is intended to allow + * the accurate measurement of the time between events. It does not provide * the wall-clock time. - * @id: event identifier with value from enum gpio_v2_line_event_id + * @id: event identifier with value from &enum gpio_v2_line_event_id * @offset: the offset of the line that triggered the event * @seqno: the sequence number for this event in the sequence of events for * all the lines in this line request @@ -319,8 +320,8 @@ struct gpio_v2_line_event { * whatever is using it, will be empty if there is no current user but may * also be empty if the consumer doesn't set this up * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_info instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_info instead. */ struct gpioline_info { __u32 line_offset; @@ -344,18 +345,18 @@ enum { * of a GPIO line * @info: updated line information * @timestamp: estimate of time of status change occurrence, in nanoseconds - * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED - * and GPIOLINE_CHANGED_CONFIG + * @event_type: one of %GPIOLINE_CHANGED_REQUESTED, + * %GPIOLINE_CHANGED_RELEASED and %GPIOLINE_CHANGED_CONFIG * @padding: reserved for future use * - * Note: struct gpioline_info embedded here has 32-bit alignment on its own, + * The &struct gpioline_info embedded here has 32-bit alignment on its own, * but it works fine with 64-bit alignment too. With its 72 byte size, we can * guarantee there are no implicit holes between it and subsequent members. * The 20-byte padding at the end makes sure we don't add any implicit padding * at the end of the structure on 64-bit architectures. * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_info_changed instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_info_changed instead. */ struct gpioline_info_changed { struct gpioline_info info; @@ -379,13 +380,13 @@ struct gpioline_info_changed { * @lineoffsets: an array of desired lines, specified by offset index for the * associated GPIO device * @flags: desired flags for the desired GPIO lines, such as - * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed + * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added * together. Note that even if multiple lines are requested, the same flags * must be applicable to all of them, if you want lines with individual * flags set, request them one by one. It is possible to select * a batch of input or output lines, but they must all have the same * characteristics, i.e. all inputs or all outputs, all active low etc - * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set for a requested + * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set for a requested * line, this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @consumer_label: a desired consumer label for the selected GPIO line(s) @@ -393,11 +394,11 @@ struct gpioline_info_changed { * @lines: number of lines requested in this request, i.e. the number of * valid fields in the above arrays, set to 1 to request a single line * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value + * after a %GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value * means error * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_request instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_request instead. */ struct gpiohandle_request { __u32 lineoffsets[GPIOHANDLES_MAX]; @@ -411,15 +412,15 @@ struct gpiohandle_request { /** * struct gpiohandle_config - Configuration for a GPIO handle request * @flags: updated flags for the requested GPIO lines, such as - * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed + * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added * together - * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set in flags, + * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set in flags, * this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @padding: reserved for future use and should be zero filled * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_config instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_config instead. */ struct gpiohandle_config { __u32 flags; @@ -433,8 +434,8 @@ struct gpiohandle_config { * state of a line, when setting the state of lines these should contain * the desired target state * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_values instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_values instead. */ struct gpiohandle_data { __u8 values[GPIOHANDLES_MAX]; @@ -450,17 +451,17 @@ struct gpiohandle_data { * @lineoffset: the desired line to subscribe to events from, specified by * offset index for the associated GPIO device * @handleflags: desired handle flags for the desired GPIO line, such as - * GPIOHANDLE_REQUEST_ACTIVE_LOW or GPIOHANDLE_REQUEST_OPEN_DRAIN + * %GPIOHANDLE_REQUEST_ACTIVE_LOW or %GPIOHANDLE_REQUEST_OPEN_DRAIN * @eventflags: desired flags for the desired GPIO event line, such as - * GPIOEVENT_REQUEST_RISING_EDGE or GPIOEVENT_REQUEST_FALLING_EDGE + * %GPIOEVENT_REQUEST_RISING_EDGE or %GPIOEVENT_REQUEST_FALLING_EDGE * @consumer_label: a desired consumer label for the selected GPIO line(s) * such as "my-listener" * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value + * after a %GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value * means error * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_request instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_request instead. */ struct gpioevent_request { __u32 lineoffset; @@ -481,8 +482,8 @@ struct gpioevent_request { * @timestamp: best estimate of time of event occurrence, in nanoseconds * @id: event identifier * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_event instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_event instead. */ struct gpioevent_data { __u64 timestamp; -- cgit From c303c51c87a61ace7330b5e0217468b1b8f98a75 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:28 +0800 Subject: gpio: uapi: remove whitespace Remove leading whitespace in ABI v1 comment. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-5-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 32dd18f238c3..ad3f56dd87ec 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -292,7 +292,7 @@ struct gpio_v2_line_event { }; /* - * ABI v1 + * ABI v1 * * This version of the ABI is deprecated. * Use the latest version of the ABI, defined above, instead. -- cgit From 2f84a2de539cc4301a332c2c76473fc25baf21b7 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 5 Oct 2020 15:03:29 +0800 Subject: gpio: uapi: clarify the meaning of 'empty' char arrays Clarify that a char array containing a string is considered 'empty' if the first character is the null terminator. The remaining characters are not relevant to this determination. Signed-off-by: Kent Gibson Reviewed-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201005070329.21055-6-warthog618@gmail.com Signed-off-by: Linus Walleij --- include/uapi/linux/gpio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index ad3f56dd87ec..2072c260f5d0 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -26,7 +26,7 @@ * struct gpiochip_info - Information about a certain GPIO chip * @name: the Linux kernel name of this GPIO chip * @label: a functional name for this GPIO chip, such as a product - * number, may be empty + * number, may be empty (i.e. label[0] == '\0') * @lines: number of GPIO lines on this chip */ struct gpiochip_info { @@ -203,7 +203,7 @@ struct gpio_v2_line_request { * struct gpio_v2_line_info - Information about a certain GPIO line * @name: the name of this GPIO line, such as the output pin of the line on * the chip, a rail or a pin header name on a board, as specified by the - * GPIO chip, may be empty + * GPIO chip, may be empty (i.e. name[0] == '\0') * @consumer: a functional name for the consumer of this GPIO line as set * by whatever is using it, will be empty if there is no current user but * may also be empty if the consumer doesn't set this up @@ -315,7 +315,7 @@ struct gpio_v2_line_event { * @flags: various flags for this line * @name: the name of this GPIO line, such as the output pin of the line on the * chip, a rail or a pin header name on a board, as specified by the gpio - * chip, may be empty + * chip, may be empty (i.e. name[0] == '\0') * @consumer: a functional name for the consumer of this GPIO line as set by * whatever is using it, will be empty if there is no current user but may * also be empty if the consumer doesn't set this up -- cgit From 6a6223ec7779dfdabb9c2567bb42079bc300cf27 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:13 +0100 Subject: blk-mq: docs: add kernel-doc description for a new struct member As reported by kernel-doc: ./include/linux/blk-mq.h:267: warning: Function parameter or member 'active_queues_shared_sbitmap' not described in 'blk_mq_tag_set' There is now a new member for struct blk_mq_tag_set. Add a description for it, based on the commit that introduced it. Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jens Axboe Reviewed-by: John Garry Link: https://lore.kernel.org/r/8e513153b83eefc05e358f51f2632b592c3f6772.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d677..794b2a33a2c3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -235,6 +235,8 @@ enum hctx_type { * @flags: Zero or more BLK_MQ_F_* flags. * @driver_data: Pointer to data owned by the block driver that created this * tag set. + * @active_queues_shared_sbitmap: + * number of active request queues per tag set. * @__bitmap_tags: A shared tags sbitmap, used over all hctx's * @__breserved_tags: * A shared reserved tags sbitmap, used over all hctx's -- cgit From 89b422354409c275e898d26607201797cc05a932 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:17 +0100 Subject: mm: pagemap.h: fix two kernel-doc markups Changeset a8cf7f272b5a ("mm: add find_lock_head") renamed the index parameter, but forgot to update the kernel-doc markups accordingly. Fixes: a8cf7f272b5a ("mm: add find_lock_head") Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/dce89b296a4f5f9f8f798d5e76b6736c14a916ac.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/pagemap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index c77b7c31b2e4..e1e19c1f9ec9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -344,9 +344,9 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, /** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search - * @offset: the page index + * @index: the page index * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, it is returned locked and with an increased * refcount. * @@ -363,9 +363,9 @@ static inline struct page *find_lock_page(struct address_space *mapping, /** * find_lock_head - Locate, pin and lock a pagecache page. * @mapping: The address_space to search. - * @offset: The page index. + * @index: The page index. * - * Looks up the page cache entry at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, its head page is returned locked and with an increased * refcount. * -- cgit From e86c6569c588a01f20e7554cc245f8fae831957b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:18 +0100 Subject: net: phy: remove kernel-doc duplication Sphinx 3 now checks for duplicated function declarations: .../Documentation/networking/kapi:143: ../include/linux/phy.h:163: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'unsigned int phy_supported_speeds (struct phy_device *phy, unsigned int *speeds, unsigned int size)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1034: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1076: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1088: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. .../Documentation/networking/kapi:143: ../include/linux/phy.h:1100: WARNING: Duplicate C declaration, also defined in 'networking/kapi'. Declaration is 'int __phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'. It turns that both the C and the H files have the same kernel-doc markup for the same functions. Let's drop the at the header file, keeping the one closer to the code. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/75e9a357f9a716833d2094b04898754876365e68.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/phy.h | 40 +++++----------------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index eb3cb1a98b45..56563e5e0dc7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -147,16 +147,8 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; -/** +/* * phy_supported_speeds - return all speeds currently supported by a PHY device - * @phy: The PHY device to return supported speeds of. - * @speeds: buffer to store supported speeds in. - * @size: size of speeds buffer. - * - * Description: Returns the number of supported speeds, and fills - * the speeds buffer with the supported speeds. If speeds buffer is - * too small to contain all currently supported speeds, will return as - * many speeds as can fit. */ unsigned int phy_supported_speeds(struct phy_device *phy, unsigned int *speeds, @@ -1022,14 +1014,9 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum, regnum, mask, set); } -/** +/* * phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for phy_read(); */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); @@ -1064,38 +1051,21 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); __ret; \ }) -/** +/* * __phy_read_mmd - Convenience function for reading a register * from an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to read from - * @regnum: The register on the MMD to read - * - * Same rules as for __phy_read(); */ int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); -/** +/* * phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for phy_write(); */ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -/** +/* * __phy_write_mmd - Convenience function for writing a register * on an MMD on a given PHY. - * @phydev: The phy_device struct - * @devad: The MMD to write to - * @regnum: The register on the MMD to read - * @val: value to write to @regnum - * - * Same rules as for __phy_write(); */ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -- cgit From cf38cc9f1e71151f22584c40357afaab6609384b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:23 +0100 Subject: locking/refcount: move kernel-doc markups to the proper place Changeset a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") added a set of functions starting with __ that have a new parameter, adding a series of new warnings: $ ./scripts/kernel-doc -none include/linux/refcount.h include/linux/refcount.h:169: warning: Function parameter or member 'oldp' not described in '__refcount_add_not_zero' include/linux/refcount.h:208: warning: Function parameter or member 'oldp' not described in '__refcount_add' include/linux/refcount.h:239: warning: Function parameter or member 'oldp' not described in '__refcount_inc_not_zero' include/linux/refcount.h:261: warning: Function parameter or member 'oldp' not described in '__refcount_inc' include/linux/refcount.h:291: warning: Function parameter or member 'oldp' not described in '__refcount_sub_and_test' include/linux/refcount.h:327: warning: Function parameter or member 'oldp' not described in '__refcount_dec_and_test' include/linux/refcount.h:347: warning: Function parameter or member 'oldp' not described in '__refcount_dec' The issue is that the kernel-doc markups are now misplaced, as they should be added just before the functions. So, move the kernel-doc markups to the proper places, in order to drop the warnings. It should be noticed that git show produces a crappy output, for this patch without "--patience" flag. Fixes: a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/7985c31d1ace591bc5e1faa05c367f1295b78afd.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- include/linux/refcount.h | 130 +++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 7fabb1af18e0..497990c69b0b 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -147,24 +147,6 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -/** - * refcount_add_not_zero - add a value to a refcount unless it is 0 - * @i: the value to add to the refcount - * @r: the refcount - * - * Will saturate at REFCOUNT_SATURATED and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - * - * Use of this function is not recommended for the normal reference counting - * use case in which references are taken and released one at a time. In these - * cases, refcount_inc(), or one of its variants, should instead be used to - * increment a reference count. - * - * Return: false if the passed refcount is 0, true otherwise - */ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -183,17 +165,12 @@ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, in return old; } -static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) -{ - return __refcount_add_not_zero(i, r, NULL); -} - /** - * refcount_add - add a value to a refcount + * refcount_add_not_zero - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * - * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * Will saturate at REFCOUNT_SATURATED and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency @@ -203,7 +180,14 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) * use case in which references are taken and released one at a time. In these * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + return __refcount_add_not_zero(i, r, NULL); +} + static inline void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -217,11 +201,32 @@ static inline void __refcount_add(int i, refcount_t *r, int *oldp) refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ static inline void refcount_add(int i, refcount_t *r) { __refcount_add(i, r, NULL); } +static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero(1, r, oldp); +} + /** * refcount_inc_not_zero - increment a refcount unless it is 0 * @r: the refcount to increment @@ -235,14 +240,14 @@ static inline void refcount_add(int i, refcount_t *r) * * Return: true if the increment was successful, false otherwise */ -static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) +static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - return __refcount_add_not_zero(1, r, oldp); + return __refcount_inc_not_zero(r, NULL); } -static inline __must_check bool refcount_inc_not_zero(refcount_t *r) +static inline void __refcount_inc(refcount_t *r, int *oldp) { - return __refcount_inc_not_zero(r, NULL); + __refcount_add(1, r, oldp); } /** @@ -257,14 +262,27 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -static inline void __refcount_inc(refcount_t *r, int *oldp) +static inline void refcount_inc(refcount_t *r) { - __refcount_add(1, r, oldp); + __refcount_inc(r, NULL); } -static inline void refcount_inc(refcount_t *r) +static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { - __refcount_inc(r, NULL); + int old = atomic_fetch_sub_release(i, &r->refs); + + if (oldp) + *oldp = old; + + if (old == i) { + smp_acquire__after_ctrl_dep(); + return true; + } + + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); + + return false; } /** @@ -287,27 +305,14 @@ static inline void refcount_inc(refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { - int old = atomic_fetch_sub_release(i, &r->refs); - - if (oldp) - *oldp = old; - - if (old == i) { - smp_acquire__after_ctrl_dep(); - return true; - } - - if (unlikely(old < 0 || old - i < 0)) - refcount_warn_saturate(r, REFCOUNT_SUB_UAF); - - return false; + return __refcount_sub_and_test(i, r, NULL); } -static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) { - return __refcount_sub_and_test(i, r, NULL); + return __refcount_sub_and_test(1, r, oldp); } /** @@ -323,26 +328,11 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) * * Return: true if the resulting refcount is 0, false otherwise */ -static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) -{ - return __refcount_sub_and_test(1, r, oldp); -} - static inline __must_check bool refcount_dec_and_test(refcount_t *r) { return __refcount_dec_and_test(r, NULL); } -/** - * refcount_dec - decrement a refcount - * @r: the refcount - * - * Similar to atomic_dec(), it will WARN on underflow and fail to decrement - * when saturated at REFCOUNT_SATURATED. - * - * Provides release memory ordering, such that prior loads and stores are done - * before. - */ static inline void __refcount_dec(refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(1, &r->refs); @@ -354,6 +344,16 @@ static inline void __refcount_dec(refcount_t *r, int *oldp) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ static inline void refcount_dec(refcount_t *r) { __refcount_dec(r, NULL); -- cgit From e029c5f2798720b463e8df0e184a4d1036311b43 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Mon, 26 Oct 2020 21:49:14 -0700 Subject: ext4: make num of fast commit blocks configurable This patch reserves a field in the jbd2 superblock for number of fast commit blocks. When this value is non-zero, Ext4 uses this field to set the number of fast commit blocks. Fixes: 6866d7b3f2bb ("ext4/jbd2: add fast commit initialization") Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201027044915.2553163-2-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fb3d71ad6eea..7e88bbc16ffb 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -263,7 +263,10 @@ typedef struct journal_superblock_s /* 0x0050 */ __u8 s_checksum_type; /* checksum type */ __u8 s_padding2[3]; - __u32 s_padding[42]; +/* 0x0054 */ + __be32 s_num_fc_blks; /* Number of fast commit blocks */ +/* 0x0058 */ + __u32 s_padding[41]; __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ -- cgit From ea4b01d9b81f5f381fc6832bc31046878a2d1a5d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Oct 2020 10:51:27 +0100 Subject: jbd2: fix a kernel-doc markup The kernel-doc markup that documents _fc_replay_callback is missing an asterisk, causing this warning: ../include/linux/jbd2.h:1271: warning: Function parameter or member 'j_fc_replay_callback' not described in 'journal_s' When building the docs. Fixes: 609f928af48f ("jbd2: fast commit recovery path") Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6055927ada2015b55b413cdd2670533bdc9a8da2.1603791716.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 7e88bbc16ffb..1d5566af48ac 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1256,7 +1256,7 @@ struct journal_s */ void (*j_fc_cleanup_callback)(struct journal_s *journal, int); - /* + /** * @j_fc_replay_callback: * * File-system specific function that performs replay of a fast -- cgit From 80ade22c06ca115b81dd168e99479c8e09843513 Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Tue, 27 Oct 2020 20:14:15 -0700 Subject: misc: mic: remove the MIC drivers This patch removes the MIC drivers from the kernel tree since the corresponding devices have been discontinued. Removing the dma and char-misc changes in one patch and merging via the char-misc tree is best to avoid any potential build breakage. Cc: Nikhil Rao Reviewed-by: Ashutosh Dixit Signed-off-by: Sudeep Dutt Acked-By: Vinod Koul Reviewed-by: Sherry Sun Link: https://lore.kernel.org/r/8c1443136563de34699d2c084df478181c205db4.1603854416.git.sudeep.dutt@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mic_bus.h | 100 --- include/linux/scif.h | 1339 --------------------------------------- include/uapi/linux/mic_common.h | 235 ------- include/uapi/linux/mic_ioctl.h | 77 --- 4 files changed, 1751 deletions(-) delete mode 100644 include/linux/mic_bus.h delete mode 100644 include/linux/scif.h delete mode 100644 include/uapi/linux/mic_common.h delete mode 100644 include/uapi/linux/mic_ioctl.h (limited to 'include') diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h deleted file mode 100644 index e99c789424e0..000000000000 --- a/include/linux/mic_bus.h +++ /dev/null @@ -1,100 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2014 Intel Corporation. - * - * Intel MIC Bus driver. - * - * This implementation is very similar to the virtio bus driver - * implementation @ include/linux/virtio.h. - */ -#ifndef _MIC_BUS_H_ -#define _MIC_BUS_H_ -/* - * Everything a mbus driver needs to work with any particular mbus - * implementation. - */ -#include -#include - -struct mbus_device_id { - __u32 device; - __u32 vendor; -}; - -#define MBUS_DEV_DMA_HOST 2 -#define MBUS_DEV_DMA_MIC 3 -#define MBUS_DEV_ANY_ID 0xffffffff - -/** - * mbus_device - representation of a device using mbus - * @mmio_va: virtual address of mmio space - * @hw_ops: the hardware ops supported by this device. - * @id: the device type identification (used to match it with a driver). - * @dev: underlying device. - * be used to communicate with. - * @index: unique position on the mbus bus - */ -struct mbus_device { - void __iomem *mmio_va; - struct mbus_hw_ops *hw_ops; - struct mbus_device_id id; - struct device dev; - int index; -}; - -/** - * mbus_driver - operations for a mbus I/O driver - * @driver: underlying device driver (populate name and owner). - * @id_table: the ids serviced by this driver. - * @probe: the function to call when a device is found. Returns 0 or -errno. - * @remove: the function to call when a device is removed. - */ -struct mbus_driver { - struct device_driver driver; - const struct mbus_device_id *id_table; - int (*probe)(struct mbus_device *dev); - void (*scan)(struct mbus_device *dev); - void (*remove)(struct mbus_device *dev); -}; - -/** - * struct mic_irq - opaque pointer used as cookie - */ -struct mic_irq; - -/** - * mbus_hw_ops - Hardware operations for accessing a MIC device on the MIC bus. - */ -struct mbus_hw_ops { - struct mic_irq* (*request_threaded_irq)(struct mbus_device *mbdev, - irq_handler_t handler, - irq_handler_t thread_fn, - const char *name, void *data, - int intr_src); - void (*free_irq)(struct mbus_device *mbdev, - struct mic_irq *cookie, void *data); - void (*ack_interrupt)(struct mbus_device *mbdev, int num); -}; - -struct mbus_device * -mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, - struct mbus_hw_ops *hw_ops, int index, - void __iomem *mmio_va); -void mbus_unregister_device(struct mbus_device *mbdev); - -int mbus_register_driver(struct mbus_driver *drv); -void mbus_unregister_driver(struct mbus_driver *drv); - -static inline struct mbus_device *dev_to_mbus(struct device *_dev) -{ - return container_of(_dev, struct mbus_device, dev); -} - -static inline struct mbus_driver *drv_to_mbus(struct device_driver *drv) -{ - return container_of(drv, struct mbus_driver, driver); -} - -#endif /* _MIC_BUS_H */ diff --git a/include/linux/scif.h b/include/linux/scif.h deleted file mode 100644 index 329e695b8fe5..000000000000 --- a/include/linux/scif.h +++ /dev/null @@ -1,1339 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel SCIF driver. - * - */ -#ifndef __SCIF_H__ -#define __SCIF_H__ - -#include -#include -#include -#include - -#define SCIF_ACCEPT_SYNC 1 -#define SCIF_SEND_BLOCK 1 -#define SCIF_RECV_BLOCK 1 - -enum { - SCIF_PROT_READ = (1 << 0), - SCIF_PROT_WRITE = (1 << 1) -}; - -enum { - SCIF_MAP_FIXED = 0x10, - SCIF_MAP_KERNEL = 0x20, -}; - -enum { - SCIF_FENCE_INIT_SELF = (1 << 0), - SCIF_FENCE_INIT_PEER = (1 << 1), - SCIF_SIGNAL_LOCAL = (1 << 4), - SCIF_SIGNAL_REMOTE = (1 << 5) -}; - -enum { - SCIF_RMA_USECPU = (1 << 0), - SCIF_RMA_USECACHE = (1 << 1), - SCIF_RMA_SYNC = (1 << 2), - SCIF_RMA_ORDERED = (1 << 3) -}; - -/* End of SCIF Admin Reserved Ports */ -#define SCIF_ADMIN_PORT_END 1024 - -/* End of SCIF Reserved Ports */ -#define SCIF_PORT_RSVD 1088 - -typedef struct scif_endpt *scif_epd_t; -typedef struct scif_pinned_pages *scif_pinned_pages_t; - -/** - * struct scif_range - SCIF registered range used in kernel mode - * @cookie: cookie used internally by SCIF - * @nr_pages: number of pages of PAGE_SIZE - * @prot_flags: R/W protection - * @phys_addr: Array of bus addresses - * @va: Array of kernel virtual addresses backed by the pages in the phys_addr - * array. The va is populated only when called on the host for a remote - * SCIF connection on MIC. This is required to support the use case of DMA - * between MIC and another device which is not a SCIF node e.g., an IB or - * ethernet NIC. - */ -struct scif_range { - void *cookie; - int nr_pages; - int prot_flags; - dma_addr_t *phys_addr; - void __iomem **va; -}; - -/** - * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll - * @epd: SCIF endpoint - * @events: requested events - * @revents: returned events - */ -struct scif_pollepd { - scif_epd_t epd; - __poll_t events; - __poll_t revents; -}; - -/** - * scif_peer_dev - representation of a peer SCIF device - * - * Peer devices show up as PCIe devices for the mgmt node but not the cards. - * The mgmt node discovers all the cards on the PCIe bus and informs the other - * cards about their peers. Upon notification of a peer a node adds a peer - * device to the peer bus to maintain symmetry in the way devices are - * discovered across all nodes in the SCIF network. - * - * @dev: underlying device - * @dnode - The destination node which this device will communicate with. - */ -struct scif_peer_dev { - struct device dev; - u8 dnode; -}; - -/** - * scif_client - representation of a SCIF client - * @name: client name - * @probe - client method called when a peer device is registered - * @remove - client method called when a peer device is unregistered - * @si - subsys_interface used internally for implementing SCIF clients - */ -struct scif_client { - const char *name; - void (*probe)(struct scif_peer_dev *spdev); - void (*remove)(struct scif_peer_dev *spdev); - struct subsys_interface si; -}; - -#define SCIF_OPEN_FAILED ((scif_epd_t)-1) -#define SCIF_REGISTER_FAILED ((off_t)-1) -#define SCIF_MMAP_FAILED ((void *)-1) - -/** - * scif_open() - Create an endpoint - * - * Return: - * Upon successful completion, scif_open() returns an endpoint descriptor to - * be used in subsequent SCIF functions calls to refer to that endpoint; - * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is - * returned and errno is set to indicate the error; in kernel mode a NULL - * scif_epd_t is returned. - * - * Errors: - * ENOMEM - Insufficient kernel memory was available - */ -scif_epd_t scif_open(void); - -/** - * scif_bind() - Bind an endpoint to a port - * @epd: endpoint descriptor - * @pn: port number - * - * scif_bind() binds endpoint epd to port pn, where pn is a port number on the - * local node. If pn is zero, a port number greater than or equal to - * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to - * exactly one local port. Ports less than 1024 when requested can only be bound - * by system (or root) processes or by processes executed by privileged users. - * - * Return: - * Upon successful completion, scif_bind() returns the port number to which epd - * is bound; otherwise in user mode -1 is returned and errno is set to - * indicate the error; in kernel mode the negative of one of the following - * errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * EINVAL - the endpoint or the port is already bound - * EISCONN - The endpoint is already connected - * ENOSPC - No port number available for assignment - * EACCES - The port requested is protected and the user is not the superuser - */ -int scif_bind(scif_epd_t epd, u16 pn); - -/** - * scif_listen() - Listen for connections on an endpoint - * @epd: endpoint descriptor - * @backlog: maximum pending connection requests - * - * scif_listen() marks the endpoint epd as a listening endpoint - that is, as - * an endpoint that will be used to accept incoming connection requests. Once - * so marked, the endpoint is said to be in the listening state and may not be - * used as the endpoint of a connection. - * - * The endpoint, epd, must have been bound to a port. - * - * The backlog argument defines the maximum length to which the queue of - * pending connections for epd may grow. If a connection request arrives when - * the queue is full, the client may receive an error with an indication that - * the connection was refused. - * - * Return: - * Upon successful completion, scif_listen() returns 0; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode the - * negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * EINVAL - the endpoint is not bound to a port - * EISCONN - The endpoint is already connected or listening - */ -int scif_listen(scif_epd_t epd, int backlog); - -/** - * scif_connect() - Initiate a connection on a port - * @epd: endpoint descriptor - * @dst: global id of port to which to connect - * - * The scif_connect() function requests the connection of endpoint epd to remote - * port dst. If the connection is successful, a peer endpoint, bound to dst, is - * created on node dst.node. On successful return, the connection is complete. - * - * If the endpoint epd has not already been bound to a port, scif_connect() - * will bind it to an unused local port. - * - * A connection is terminated when an endpoint of the connection is closed, - * either explicitly by scif_close(), or when a process that owns one of the - * endpoints of the connection is terminated. - * - * In user space, scif_connect() supports an asynchronous connection mode - * if the application has set the O_NONBLOCK flag on the endpoint via the - * fcntl() system call. Setting this flag will result in the calling process - * not to wait during scif_connect(). - * - * Return: - * Upon successful completion, scif_connect() returns the port ID to which the - * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is - * set to indicate the error; in kernel mode the negative of one of the - * following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNREFUSED - The destination was not listening for connections or refused - * the connection request - * EINVAL - dst.port is not a valid port ID - * EISCONN - The endpoint is already connected - * ENOMEM - No buffer space is available - * ENODEV - The destination node does not exist, or the node is lost or existed, - * but is not currently in the network since it may have crashed - * ENOSPC - No port number available for assignment - * EOPNOTSUPP - The endpoint is listening and cannot be connected - */ -int scif_connect(scif_epd_t epd, struct scif_port_id *dst); - -/** - * scif_accept() - Accept a connection on an endpoint - * @epd: endpoint descriptor - * @peer: global id of port to which connected - * @newepd: new connected endpoint descriptor - * @flags: flags - * - * The scif_accept() call extracts the first connection request from the queue - * of pending connections for the port on which epd is listening. scif_accept() - * creates a new endpoint, bound to the same port as epd, and allocates a new - * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new - * endpoint is connected to the endpoint through which the connection was - * requested. epd is unaffected by this call, and remains in the listening - * state. - * - * On successful return, peer holds the global port identifier (node id and - * local port number) of the port which requested the connection. - * - * A connection is terminated when an endpoint of the connection is closed, - * either explicitly by scif_close(), or when a process that owns one of the - * endpoints of the connection is terminated. - * - * The number of connections that can (subsequently) be accepted on epd is only - * limited by system resources (memory). - * - * The flags argument is formed by OR'ing together zero or more of the - * following values. - * SCIF_ACCEPT_SYNC - block until a connection request is presented. If - * SCIF_ACCEPT_SYNC is not in flags, and no pending - * connections are present on the queue, scif_accept() - * fails with an EAGAIN error - * - * In user mode, the select() and poll() functions can be used to determine - * when there is a connection request. In kernel mode, the scif_poll() - * function may be used for this purpose. A readable event will be delivered - * when a connection is requested. - * - * Return: - * Upon successful completion, scif_accept() returns 0; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode the - * negative of one of the following errors is returned. - * - * Errors: - * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be - * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete - * its connection request - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * EINTR - Interrupted function - * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is - * NULL, or newepd is NULL - * ENODEV - The requesting node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOENT - Secondary part of epd registration failed - */ -int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t - *newepd, int flags); - -/** - * scif_close() - Close an endpoint - * @epd: endpoint descriptor - * - * scif_close() closes an endpoint and performs necessary teardown of - * facilities associated with that endpoint. - * - * If epd is a listening endpoint then it will no longer accept connection - * requests on the port to which it is bound. Any pending connection requests - * are rejected. - * - * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs - * which are in-process through epd or its peer endpoint will complete before - * scif_close() returns. Registered windows of the local and peer endpoints are - * released as if scif_unregister() was called against each window. - * - * Closing a SCIF endpoint does not affect local registered memory mapped by - * a SCIF endpoint on a remote node. The local memory remains mapped by the peer - * SCIF endpoint explicitly removed by calling munmap(..) by the peer. - * - * If the peer endpoint's receive queue is not empty at the time that epd is - * closed, then the peer endpoint can be passed as the endpoint parameter to - * scif_recv() until the receive queue is empty. - * - * epd is freed and may no longer be accessed. - * - * Return: - * Upon successful completion, scif_close() returns 0; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode the - * negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - */ -int scif_close(scif_epd_t epd); - -/** - * scif_send() - Send a message - * @epd: endpoint descriptor - * @msg: message buffer address - * @len: message length - * @flags: blocking mode flags - * - * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data - * are copied from memory starting at address msg. On successful execution the - * return value of scif_send() is the number of bytes that were sent, and is - * zero if no bytes were sent because len was zero. scif_send() may be called - * only when the endpoint is in a connected state. - * - * If a scif_send() call is non-blocking, then it sends only those bytes which - * can be sent without waiting, up to a maximum of len bytes. - * - * If a scif_send() call is blocking, then it normally returns after sending - * all len bytes. If a blocking call is interrupted or the connection is - * reset, the call is considered successful if some bytes were sent or len is - * zero, otherwise the call is considered unsuccessful. - * - * In user mode, the select() and poll() functions can be used to determine - * when the send queue is not full. In kernel mode, the scif_poll() function - * may be used for this purpose. - * - * It is recommended that scif_send()/scif_recv() only be used for short - * control-type message communication between SCIF endpoints. The SCIF RMA - * APIs are expected to provide better performance for transfer sizes of - * 1024 bytes or longer for the current MIC hardware and software - * implementation. - * - * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK - * is passed as the flags argument. - * - * Return: - * Upon successful completion, scif_send() returns the number of bytes sent; - * otherwise in user mode -1 is returned and errno is set to indicate the - * error; in kernel mode the negative of one of the following errors is - * returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid, or len is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN - The endpoint is not connected - */ -int scif_send(scif_epd_t epd, void *msg, int len, int flags); - -/** - * scif_recv() - Receive a message - * @epd: endpoint descriptor - * @msg: message buffer address - * @len: message buffer length - * @flags: blocking mode flags - * - * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of - * data are copied to memory starting at address msg. On successful execution - * the return value of scif_recv() is the number of bytes that were received, - * and is zero if no bytes were received because len was zero. scif_recv() may - * be called only when the endpoint is in a connected state. - * - * If a scif_recv() call is non-blocking, then it receives only those bytes - * which can be received without waiting, up to a maximum of len bytes. - * - * If a scif_recv() call is blocking, then it normally returns after receiving - * all len bytes. If the blocking call was interrupted due to a disconnection, - * subsequent calls to scif_recv() will copy all bytes received upto the point - * of disconnection. - * - * In user mode, the select() and poll() functions can be used to determine - * when data is available to be received. In kernel mode, the scif_poll() - * function may be used for this purpose. - * - * It is recommended that scif_send()/scif_recv() only be used for short - * control-type message communication between SCIF endpoints. The SCIF RMA - * APIs are expected to provide better performance for transfer sizes of - * 1024 bytes or longer for the current MIC hardware and software - * implementation. - * - * scif_recv() will block until the entire message is received if - * SCIF_RECV_BLOCK is passed as the flags argument. - * - * Return: - * Upon successful completion, scif_recv() returns the number of bytes - * received; otherwise in user mode -1 is returned and errno is set to - * indicate the error; in kernel mode the negative of one of the following - * errors is returned. - * - * Errors: - * EAGAIN - The destination node is returning from a low power state - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid, or len is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN - The endpoint is not connected - */ -int scif_recv(scif_epd_t epd, void *msg, int len, int flags); - -/** - * scif_register() - Mark a memory region for remote access. - * @epd: endpoint descriptor - * @addr: starting virtual address - * @len: length of range - * @offset: offset of window - * @prot_flags: read/write protection flags - * @map_flags: mapping flags - * - * The scif_register() function opens a window, a range of whole pages of the - * registered address space of the endpoint epd, starting at offset po and - * continuing for len bytes. The value of po, further described below, is a - * function of the parameters offset and len, and the value of map_flags. Each - * page of the window represents the physical memory page which backs the - * corresponding page of the range of virtual address pages starting at addr - * and continuing for len bytes. addr and len are constrained to be multiples - * of the page size. A successful scif_register() call returns po. - * - * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset - * exactly, and offset is constrained to be a multiple of the page size. The - * mapping established by scif_register() will not replace any existing - * registration; an error is returned if any page within the range [offset, - * offset + len - 1] intersects an existing window. - * - * When SCIF_MAP_FIXED is not set, the implementation uses offset in an - * implementation-defined manner to arrive at po. The po value so chosen will - * be an area of the registered address space that the implementation deems - * suitable for a mapping of len bytes. An offset value of 0 is interpreted as - * granting the implementation complete freedom in selecting po, subject to - * constraints described below. A non-zero value of offset is taken to be a - * suggestion of an offset near which the mapping should be placed. When the - * implementation selects a value for po, it does not replace any extant - * window. In all cases, po will be a multiple of the page size. - * - * The physical pages which are so represented by a window are available for - * access in calls to mmap(), scif_readfrom(), scif_writeto(), - * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the - * physical pages represented by the window will not be reused by the memory - * subsystem for any other purpose. Note that the same physical page may be - * represented by multiple windows. - * - * Subsequent operations which change the memory pages to which virtual - * addresses are mapped (such as mmap(), munmap()) have no effect on - * existing window. - * - * If the process will fork(), it is recommended that the registered - * virtual address range be marked with MADV_DONTFORK. Doing so will prevent - * problems due to copy-on-write semantics. - * - * The prot_flags argument is formed by OR'ing together one or more of the - * following values. - * SCIF_PROT_READ - allow read operations from the window - * SCIF_PROT_WRITE - allow write operations to the window - * - * Return: - * Upon successful completion, scif_register() returns the offset at which the - * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that - * is (off_t *)-1) is returned and errno is set to indicate the error; in - * kernel mode the negative of one of the following errors is returned. - * - * Errors: - * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range - * [offset, offset + len -1] are already registered - * EAGAIN - The mapping could not be performed due to lack of resources - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is - * set in flags, and offset is not a multiple of the page size, or addr is not a - * multiple of the page size, or len is not a multiple of the page size, or is - * 0, or offset is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN -The endpoint is not connected - */ -off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, - int prot_flags, int map_flags); - -/** - * scif_unregister() - Mark a memory region for remote access. - * @epd: endpoint descriptor - * @offset: start of range to unregister - * @len: length of range to unregister - * - * The scif_unregister() function closes those previously registered windows - * which are entirely within the range [offset, offset + len - 1]. It is an - * error to specify a range which intersects only a subrange of a window. - * - * On a successful return, pages within the window may no longer be specified - * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), - * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, - * however, continues to exist until all previous references against it are - * removed. A window is referenced if there is a mapping to it created by - * mmap(), or if scif_get_pages() was called against the window - * (and the pages have not been returned via scif_put_pages()). A window is - * also referenced while an RMA, in which some range of the window is a source - * or destination, is in progress. Finally a window is referenced while some - * offset in that window was specified to scif_fence_signal(), and the RMAs - * marked by that call to scif_fence_signal() have not completed. While a - * window is in this state, its registered address space pages are not - * available for use in a new registered window. - * - * When all such references to the window have been removed, its references to - * all the physical pages which it represents are removed. Similarly, the - * registered address space pages of the window become available for - * registration in a new window. - * - * Return: - * Upon successful completion, scif_unregister() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. In the event of an - * error, no windows are unregistered. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a - * window, or offset is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the - * registered address space of epd - */ -int scif_unregister(scif_epd_t epd, off_t offset, size_t len); - -/** - * scif_readfrom() - Copy from a remote address space - * @epd: endpoint descriptor - * @loffset: offset in local registered address space to - * which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space - * from which to copy - * @rma_flags: transfer mode flags - * - * scif_readfrom() copies len bytes from the remote registered address space of - * the peer of endpoint epd, starting at the offset roffset to the local - * registered address space of epd, starting at the offset loffset. - * - * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, - * roffset + len - 1] must be within some registered window or windows of the - * local and remote nodes. A range may intersect multiple registered windows, - * but only if those windows are contiguous in the registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * The optimal DMA performance will likely be realized if both - * loffset and roffset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if loffset and roffset are not - * cacheline aligned but are separated by some multiple of 64. The lowest level - * of performance is likely if loffset and roffset are not separated by a - * multiple of 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_readfrom() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered - * address space of epd, or, The range [roffset, roffset + len - 1] is invalid - * for the registered address space of the peer of epd, or loffset or roffset - * is negative - */ -int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t - roffset, int rma_flags); - -/** - * scif_writeto() - Copy to a remote address space - * @epd: endpoint descriptor - * @loffset: offset in local registered address space - * from which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space to - * which to copy - * @rma_flags: transfer mode flags - * - * scif_writeto() copies len bytes from the local registered address space of - * epd, starting at the offset loffset to the remote registered address space - * of the peer of endpoint epd, starting at the offset roffset. - * - * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, - * roffset + len - 1] must be within some registered window or windows of the - * local and remote nodes. A range may intersect multiple registered windows, - * but only if those windows are contiguous in the registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * The optimal DMA performance will likely be realized if both - * loffset and roffset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if loffset and roffset are not cacheline - * aligned but are separated by some multiple of 64. The lowest level of - * performance is likely if loffset and roffset are not separated by a multiple - * of 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_readfrom() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered - * address space of epd, or, The range [roffset , roffset + len -1] is invalid - * for the registered address space of the peer of epd, or loffset or roffset - * is negative - */ -int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t - roffset, int rma_flags); - -/** - * scif_vreadfrom() - Copy from a remote address space - * @epd: endpoint descriptor - * @addr: address to which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space - * from which to copy - * @rma_flags: transfer mode flags - * - * scif_vreadfrom() copies len bytes from the remote registered address - * space of the peer of endpoint epd, starting at the offset roffset, to local - * memory, starting at addr. - * - * The specified range [roffset, roffset + len - 1] must be within some - * registered window or windows of the remote nodes. The range may - * intersect multiple registered windows, but only if those windows are - * contiguous in the registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back - * the specified local memory range may be remain in a pinned state even after - * the specified transfer completes. This may reduce overhead if some or all of - * the same virtual address range is referenced in a subsequent call of - * scif_vreadfrom() or scif_vwriteto(). - * - * The optimal DMA performance will likely be realized if both - * addr and roffset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if addr and roffset are not - * cacheline aligned but are separated by some multiple of 64. The lowest level - * of performance is likely if addr and roffset are not separated by a - * multiple of 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_USECACHE - enable registration caching - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the - * registered address space of epd - */ -int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, - int rma_flags); - -/** - * scif_vwriteto() - Copy to a remote address space - * @epd: endpoint descriptor - * @addr: address from which to copy - * @len: length of range to copy - * @roffset: offset in remote registered address space to - * which to copy - * @rma_flags: transfer mode flags - * - * scif_vwriteto() copies len bytes from the local memory, starting at addr, to - * the remote registered address space of the peer of endpoint epd, starting at - * the offset roffset. - * - * The specified range [roffset, roffset + len - 1] must be within some - * registered window or windows of the remote nodes. The range may intersect - * multiple registered windows, but only if those windows are contiguous in the - * registered address space. - * - * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using - * programmed read/writes. Otherwise the data is copied using DMA. If rma_- - * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the - * transfer is complete. Otherwise, the transfer may be performed asynchron- - * ously. The order in which any two asynchronous RMA operations complete - * is non-deterministic. The synchronization functions, scif_fence_mark()/ - * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to - * the completion of asynchronous RMA operations on the same endpoint. - * - * The DMA transfer of individual bytes is not guaranteed to complete in - * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last - * cacheline or partial cacheline of the source range will become visible on - * the destination node after all other transferred data in the source - * range has become visible on the destination node. - * - * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back - * the specified local memory range may be remain in a pinned state even after - * the specified transfer completes. This may reduce overhead if some or all of - * the same virtual address range is referenced in a subsequent call of - * scif_vreadfrom() or scif_vwriteto(). - * - * The optimal DMA performance will likely be realized if both - * addr and offset are cacheline aligned (are a multiple of 64). Lower - * performance will likely be realized if addr and offset are not cacheline - * aligned but are separated by some multiple of 64. The lowest level of - * performance is likely if addr and offset are not separated by a multiple of - * 64. - * - * The rma_flags argument is formed by ORing together zero or more of the - * following values. - * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA - * engine. - * SCIF_RMA_USECACHE - allow registration caching - * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the - * transfer has completed. Passing this flag results in the - * current implementation busy waiting and consuming CPU cycles - * while the DMA transfer is in progress for best performance by - * avoiding the interrupt latency. - * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of - * the source range becomes visible on the destination node - * after all other transferred data in the source range has - * become visible on the destination - * - * Return: - * Upon successful completion, scif_vwriteto() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EACCES - Attempt to write to a read-only range - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - rma_flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the - * registered address space of epd - */ -int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, - int rma_flags); - -/** - * scif_fence_mark() - Mark previously issued RMAs - * @epd: endpoint descriptor - * @flags: control flags - * @mark: marked value returned as output. - * - * scif_fence_mark() returns after marking the current set of all uncompleted - * RMAs initiated through the endpoint epd or the current set of all - * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are - * marked with a value returned at mark. The application may subsequently call - * scif_fence_wait(), passing the value returned at mark, to await completion - * of all RMAs so marked. - * - * The flags argument has exactly one of the following values. - * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint - * epd are marked - * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer - * of endpoint epd are marked - * - * Return: - * Upon successful completion, scif_fence_mark() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENOMEM - Insufficient kernel memory was available - */ -int scif_fence_mark(scif_epd_t epd, int flags, int *mark); - -/** - * scif_fence_wait() - Wait for completion of marked RMAs - * @epd: endpoint descriptor - * @mark: mark request - * - * scif_fence_wait() returns after all RMAs marked with mark have completed. - * The value passed in mark must have been obtained in a previous call to - * scif_fence_mark(). - * - * Return: - * Upon successful completion, scif_fence_wait() returns 0; otherwise in user - * mode -1 is returned and errno is set to indicate the error; in kernel mode - * the negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENOMEM - Insufficient kernel memory was available - */ -int scif_fence_wait(scif_epd_t epd, int mark); - -/** - * scif_fence_signal() - Request a memory update on completion of RMAs - * @epd: endpoint descriptor - * @loff: local offset - * @lval: local value to write to loffset - * @roff: remote offset - * @rval: remote value to write to roffset - * @flags: flags - * - * scif_fence_signal() returns after marking the current set of all uncompleted - * RMAs initiated through the endpoint epd or marking the current set of all - * uncompleted RMAs initiated through the peer of endpoint epd. - * - * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the - * marked set, lval is written to memory at the address corresponding to offset - * loff in the local registered address space of epd. loff must be within a - * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion - * of the RMAs in the marked set, rval is written to memory at the address - * corresponding to offset roff in the remote registered address space of epd. - * roff must be within a remote registered window of the peer of epd. Note - * that any specified offset must be DWORD (4 byte / 32 bit) aligned. - * - * The flags argument is formed by OR'ing together the following. - * Exactly one of the following values. - * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint - * epd are marked - * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer - * of endpoint epd are marked - * One or more of the following values. - * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to - * memory at the address corresponding to offset loff in the local - * registered address space of epd. - * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to - * memory at the address corresponding to offset roff in the remote - * registered address space of epd. - * - * Return: - * Upon successful completion, scif_fence_signal() returns 0; otherwise in - * user mode -1 is returned and errno is set to indicate the error; in kernel - * mode the negative of one of the following errors is returned. - * - * Errors: - * EBADF, ENOTTY - epd is not a valid endpoint descriptor - * ECONNRESET - Connection reset by peer - * EINVAL - flags is invalid, or loff or roff are not DWORD aligned - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - loff is invalid for the registered address of epd, or roff is invalid - * for the registered address space, of the peer of epd - */ -int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, - u64 rval, int flags); - -/** - * scif_get_node_ids() - Return information about online nodes - * @nodes: array in which to return online node IDs - * @len: number of entries in the nodes array - * @self: address to place the node ID of the local node - * - * scif_get_node_ids() fills in the nodes array with up to len node IDs of the - * nodes in the SCIF network. If there is not enough space in nodes, as - * indicated by the len parameter, only len node IDs are returned in nodes. The - * return value of scif_get_node_ids() is the total number of nodes currently in - * the SCIF network. By checking the return value against the len parameter, - * the user may determine if enough space for nodes was allocated. - * - * The node ID of the local node is returned at self. - * - * Return: - * Upon successful completion, scif_get_node_ids() returns the actual number of - * online nodes in the SCIF network including 'self'; otherwise in user mode - * -1 is returned and errno is set to indicate the error; in kernel mode no - * errors are returned. - */ -int scif_get_node_ids(u16 *nodes, int len, u16 *self); - -/** - * scif_pin_pages() - Pin a set of pages - * @addr: Virtual address of range to pin - * @len: Length of range to pin - * @prot_flags: Page protection flags - * @map_flags: Page classification flags - * @pinned_pages: Handle to pinned pages - * - * scif_pin_pages() pins (locks in physical memory) the physical pages which - * back the range of virtual address pages starting at addr and continuing for - * len bytes. addr and len are constrained to be multiples of the page size. A - * successful scif_pin_pages() call returns a handle to pinned_pages which may - * be used in subsequent calls to scif_register_pinned_pages(). - * - * The pages will remain pinned as long as there is a reference against the - * scif_pinned_pages_t value returned by scif_pin_pages() and until - * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A - * reference is added to a scif_pinned_pages_t value each time a window is - * created by calling scif_register_pinned_pages() and passing the - * scif_pinned_pages_t value. A reference is removed from a - * scif_pinned_pages_t value each time such a window is deleted. - * - * Subsequent operations which change the memory pages to which virtual - * addresses are mapped (such as mmap(), munmap()) have no effect on the - * scif_pinned_pages_t value or windows created against it. - * - * If the process will fork(), it is recommended that the registered - * virtual address range be marked with MADV_DONTFORK. Doing so will prevent - * problems due to copy-on-write semantics. - * - * The prot_flags argument is formed by OR'ing together one or more of the - * following values. - * SCIF_PROT_READ - allow read operations against the pages - * SCIF_PROT_WRITE - allow write operations against the pages - * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a - * kernel space address. By default, addr is interpreted as a user space - * address. - * - * Return: - * Upon successful completion, scif_pin_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * - * Errors: - * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative - * ENOMEM - Not enough space - */ -int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags, - scif_pinned_pages_t *pinned_pages); - -/** - * scif_unpin_pages() - Unpin a set of pages - * @pinned_pages: Handle to pinned pages to be unpinned - * - * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new - * windows against pinned_pages. The physical pages represented by pinned_pages - * will remain pinned until all windows previously registered against - * pinned_pages are deleted (the window is scif_unregister()'d and all - * references to the window are removed (see scif_unregister()). - * - * pinned_pages must have been obtain from a previous call to scif_pin_pages(). - * After calling scif_unpin_pages(), it is an error to pass pinned_pages to - * scif_register_pinned_pages(). - * - * Return: - * Upon successful completion, scif_unpin_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * - * Errors: - * EINVAL - pinned_pages is not valid - */ -int scif_unpin_pages(scif_pinned_pages_t pinned_pages); - -/** - * scif_register_pinned_pages() - Mark a memory region for remote access. - * @epd: endpoint descriptor - * @pinned_pages: Handle to pinned pages - * @offset: Registered address space offset - * @map_flags: Flags which control where pages are mapped - * - * The scif_register_pinned_pages() function opens a window, a range of whole - * pages of the registered address space of the endpoint epd, starting at - * offset po. The value of po, further described below, is a function of the - * parameters offset and pinned_pages, and the value of map_flags. Each page of - * the window represents a corresponding physical memory page of the range - * represented by pinned_pages; the length of the window is the same as the - * length of range represented by pinned_pages. A successful - * scif_register_pinned_pages() call returns po as the return value. - * - * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset - * exactly, and offset is constrained to be a multiple of the page size. The - * mapping established by scif_register_pinned_pages() will not replace any - * existing registration; an error is returned if any page of the new window - * would intersect an existing window. - * - * When SCIF_MAP_FIXED is not set, the implementation uses offset in an - * implementation-defined manner to arrive at po. The po so chosen will be an - * area of the registered address space that the implementation deems suitable - * for a mapping of the required size. An offset value of 0 is interpreted as - * granting the implementation complete freedom in selecting po, subject to - * constraints described below. A non-zero value of offset is taken to be a - * suggestion of an offset near which the mapping should be placed. When the - * implementation selects a value for po, it does not replace any extant - * window. In all cases, po will be a multiple of the page size. - * - * The physical pages which are so represented by a window are available for - * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), - * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the - * physical pages represented by the window will not be reused by the memory - * subsystem for any other purpose. Note that the same physical page may be - * represented by multiple windows. - * - * Windows created by scif_register_pinned_pages() are unregistered by - * scif_unregister(). - * - * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a - * fixed offset. - * - * Return: - * Upon successful completion, scif_register_pinned_pages() returns the offset - * at which the mapping was placed (po); otherwise the negative of one of the - * following errors is returned. - * - * Errors: - * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window - * would intersect an existing window - * EAGAIN - The mapping could not be performed due to lack of resources - * ECONNRESET - Connection reset by peer - * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and - * offset is not a multiple of the page size, or offset is negative - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOMEM - Not enough space - * ENOTCONN - The endpoint is not connected - */ -off_t scif_register_pinned_pages(scif_epd_t epd, - scif_pinned_pages_t pinned_pages, - off_t offset, int map_flags); - -/** - * scif_get_pages() - Add references to remote registered pages - * @epd: endpoint descriptor - * @offset: remote registered offset - * @len: length of range of pages - * @pages: returned scif_range structure - * - * scif_get_pages() returns the addresses of the physical pages represented by - * those pages of the registered address space of the peer of epd, starting at - * offset and continuing for len bytes. offset and len are constrained to be - * multiples of the page size. - * - * All of the pages in the specified range [offset, offset + len - 1] must be - * within a single window of the registered address space of the peer of epd. - * - * The addresses are returned as a virtually contiguous array pointed to by the - * phys_addr component of the scif_range structure whose address is returned in - * pages. The nr_pages component of scif_range is the length of the array. The - * prot_flags component of scif_range holds the protection flag value passed - * when the pages were registered. - * - * Each physical page whose address is returned by scif_get_pages() remains - * available and will not be released for reuse until the scif_range structure - * is returned in a call to scif_put_pages(). The scif_range structure returned - * by scif_get_pages() must be unmodified. - * - * It is an error to call scif_close() on an endpoint on which a scif_range - * structure of that endpoint has not been returned to scif_put_pages(). - * - * Return: - * Upon successful completion, scif_get_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * Errors: - * ECONNRESET - Connection reset by peer. - * EINVAL - offset is not a multiple of the page size, or offset is negative, or - * len is not a multiple of the page size - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid - * for the registered address space of the peer epd - */ -int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, - struct scif_range **pages); - -/** - * scif_put_pages() - Remove references from remote registered pages - * @pages: pages to be returned - * - * scif_put_pages() releases a scif_range structure previously obtained by - * calling scif_get_pages(). The physical pages represented by pages may - * be reused when the window which represented those pages is unregistered. - * Therefore, those pages must not be accessed after calling scif_put_pages(). - * - * Return: - * Upon successful completion, scif_put_pages() returns 0; otherwise the - * negative of one of the following errors is returned. - * Errors: - * EINVAL - pages does not point to a valid scif_range structure, or - * the scif_range structure pointed to by pages was already returned - * ENODEV - The remote node is lost or existed, but is not currently in the - * network since it may have crashed - * ENOTCONN - The endpoint is not connected - */ -int scif_put_pages(struct scif_range *pages); - -/** - * scif_poll() - Wait for some event on an endpoint - * @epds: Array of endpoint descriptors - * @nepds: Length of epds - * @timeout: Upper limit on time for which scif_poll() will block - * - * scif_poll() waits for one of a set of endpoints to become ready to perform - * an I/O operation. - * - * The epds argument specifies the endpoint descriptors to be examined and the - * events of interest for each endpoint descriptor. epds is a pointer to an - * array with one member for each open endpoint descriptor of interest. - * - * The number of items in the epds array is specified in nepds. The epd field - * of scif_pollepd is an endpoint descriptor of an open endpoint. The field - * events is a bitmask specifying the events which the application is - * interested in. The field revents is an output parameter, filled by the - * kernel with the events that actually occurred. The bits returned in revents - * can include any of those specified in events, or one of the values EPOLLERR, - * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events - * field, and will be set in the revents field whenever the corresponding - * condition is true.) - * - * If none of the events requested (and no error) has occurred for any of the - * endpoint descriptors, then scif_poll() blocks until one of the events occurs. - * - * The timeout argument specifies an upper limit on the time for which - * scif_poll() will block, in milliseconds. Specifying a negative value in - * timeout means an infinite timeout. - * - * The following bits may be set in events and returned in revents. - * EPOLLIN - Data may be received without blocking. For a connected - * endpoint, this means that scif_recv() may be called without blocking. For a - * listening endpoint, this means that scif_accept() may be called without - * blocking. - * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this - * means that scif_send() may be called without blocking. EPOLLOUT may also be - * used to block waiting for a non-blocking connect to complete. This bit value - * has no meaning for a listening endpoint and is ignored if specified. - * - * The following bits are only returned in revents, and are ignored if set in - * events. - * EPOLLERR - An error occurred on the endpoint - * EPOLLHUP - The connection to the peer endpoint was disconnected - * EPOLLNVAL - The specified endpoint descriptor is invalid. - * - * Return: - * Upon successful completion, scif_poll() returns a non-negative value. A - * positive value indicates the total number of endpoint descriptors that have - * been selected (that is, endpoint descriptors for which the revents member is - * non-zero). A value of 0 indicates that the call timed out and no endpoint - * descriptors have been selected. Otherwise in user mode -1 is returned and - * errno is set to indicate the error; in kernel mode the negative of one of - * the following errors is returned. - * - * Errors: - * EINTR - A signal occurred before any requested event - * EINVAL - The nepds argument is greater than {OPEN_MAX} - * ENOMEM - There was no space to allocate file descriptor tables - */ -int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); - -/** - * scif_client_register() - Register a SCIF client - * @client: client to be registered - * - * scif_client_register() registers a SCIF client. The probe() method - * of the client is called when SCIF peer devices come online and the - * remove() method is called when the peer devices disappear. - * - * Return: - * Upon successful completion, scif_client_register() returns a non-negative - * value. Otherwise the return value is the same as subsys_interface_register() - * in the kernel. - */ -int scif_client_register(struct scif_client *client); - -/** - * scif_client_unregister() - Unregister a SCIF client - * @client: client to be unregistered - * - * scif_client_unregister() unregisters a SCIF client. - * - * Return: - * None - */ -void scif_client_unregister(struct scif_client *client); - -#endif /* __SCIF_H__ */ diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h deleted file mode 100644 index 504e523f702c..000000000000 --- a/include/uapi/linux/mic_common.h +++ /dev/null @@ -1,235 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC driver. - * - */ -#ifndef __MIC_COMMON_H_ -#define __MIC_COMMON_H_ - -#include - -#define __mic_align(a, x) (((a) + (x) - 1) & ~((x) - 1)) - -/** - * struct mic_device_desc: Virtio device information shared between the - * virtio driver and userspace backend - * - * @type: Device type: console/network/disk etc. Type 0/-1 terminates. - * @num_vq: Number of virtqueues. - * @feature_len: Number of bytes of feature bits. Multiply by 2: one for - host features and one for guest acknowledgements. - * @config_len: Number of bytes of the config array after virtqueues. - * @status: A status byte, written by the Guest. - * @config: Start of the following variable length config. - */ -struct mic_device_desc { - __s8 type; - __u8 num_vq; - __u8 feature_len; - __u8 config_len; - __u8 status; - __le64 config[0]; -} __attribute__ ((aligned(8))); - -/** - * struct mic_device_ctrl: Per virtio device information in the device page - * used internally by the host and card side drivers. - * - * @vdev: Used for storing MIC vdev information by the guest. - * @config_change: Set to 1 by host when a config change is requested. - * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset. - * @guest_ack: Set to 1 by guest to ack a command. - * @host_ack: Set to 1 by host to ack a command. - * @used_address_updated: Set to 1 by guest when the used address should be - * updated. - * @c2h_vdev_db: The doorbell number to be used by guest. Set by host. - * @h2c_vdev_db: The doorbell number to be used by host. Set by guest. - */ -struct mic_device_ctrl { - __le64 vdev; - __u8 config_change; - __u8 vdev_reset; - __u8 guest_ack; - __u8 host_ack; - __u8 used_address_updated; - __s8 c2h_vdev_db; - __s8 h2c_vdev_db; -} __attribute__ ((aligned(8))); - -/** - * struct mic_bootparam: Virtio device independent information in device page - * - * @magic: A magic value used by the card to ensure it can see the host - * @h2c_config_db: Host to Card Virtio config doorbell set by card - * @node_id: Unique id of the node - * @h2c_scif_db - Host to card SCIF doorbell set by card - * @c2h_scif_db - Card to host SCIF doorbell set by host - * @scif_host_dma_addr - SCIF host queue pair DMA address - * @scif_card_dma_addr - SCIF card queue pair DMA address - */ -struct mic_bootparam { - __le32 magic; - __s8 h2c_config_db; - __u8 node_id; - __u8 h2c_scif_db; - __u8 c2h_scif_db; - __u64 scif_host_dma_addr; - __u64 scif_card_dma_addr; -} __attribute__ ((aligned(8))); - -/** - * struct mic_device_page: High level representation of the device page - * - * @bootparam: The bootparam structure is used for sharing information and - * status updates between MIC host and card drivers. - * @desc: Array of MIC virtio device descriptors. - */ -struct mic_device_page { - struct mic_bootparam bootparam; - struct mic_device_desc desc[0]; -}; -/** - * struct mic_vqconfig: This is how we expect the device configuration field - * for a virtqueue to be laid out in config space. - * - * @address: Guest/MIC physical address of the virtio ring - * (avail and desc rings) - * @used_address: Guest/MIC physical address of the used ring - * @num: The number of entries in the virtio_ring - */ -struct mic_vqconfig { - __le64 address; - __le64 used_address; - __le16 num; -} __attribute__ ((aligned(8))); - -/* - * The alignment to use between consumer and producer parts of vring. - * This is pagesize for historical reasons. - */ -#define MIC_VIRTIO_RING_ALIGN 4096 - -#define MIC_MAX_VRINGS 4 -#define MIC_VRING_ENTRIES 128 - -/* - * Max vring entries (power of 2) to ensure desc and avail rings - * fit in a single page - */ -#define MIC_MAX_VRING_ENTRIES 128 - -/** - * Max size of the desc block in bytes: includes: - * - struct mic_device_desc - * - struct mic_vqconfig (num_vq of these) - * - host and guest features - * - virtio device config space - */ -#define MIC_MAX_DESC_BLK_SIZE 256 - -/** - * struct _mic_vring_info - Host vring info exposed to userspace backend - * for the avail index and magic for the card. - * - * @avail_idx: host avail idx - * @magic: A magic debug cookie. - */ -struct _mic_vring_info { - __u16 avail_idx; - __le32 magic; -}; - -/** - * struct mic_vring - Vring information. - * - * @vr: The virtio ring. - * @info: Host vring information exposed to the userspace backend for the - * avail index and magic for the card. - * @va: The va for the buffer allocated for vr and info. - * @len: The length of the buffer required for allocating vr and info. - */ -struct mic_vring { - struct vring vr; - struct _mic_vring_info *info; - void *va; - int len; -}; - -#define mic_aligned_desc_size(d) __mic_align(mic_desc_size(d), 8) - -#ifndef INTEL_MIC_CARD -static inline unsigned mic_desc_size(const struct mic_device_desc *desc) -{ - return sizeof(*desc) + desc->num_vq * sizeof(struct mic_vqconfig) - + desc->feature_len * 2 + desc->config_len; -} - -static inline struct mic_vqconfig * -mic_vq_config(const struct mic_device_desc *desc) -{ - return (struct mic_vqconfig *)(desc + 1); -} - -static inline __u8 *mic_vq_features(const struct mic_device_desc *desc) -{ - return (__u8 *)(mic_vq_config(desc) + desc->num_vq); -} - -static inline __u8 *mic_vq_configspace(const struct mic_device_desc *desc) -{ - return mic_vq_features(desc) + desc->feature_len * 2; -} -static inline unsigned mic_total_desc_size(struct mic_device_desc *desc) -{ - return mic_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl); -} -#endif - -/* Device page size */ -#define MIC_DP_SIZE 4096 - -#define MIC_MAGIC 0xc0ffee00 - -/** - * enum mic_states - MIC states. - */ -enum mic_states { - MIC_READY = 0, - MIC_BOOTING, - MIC_ONLINE, - MIC_SHUTTING_DOWN, - MIC_RESETTING, - MIC_RESET_FAILED, - MIC_LAST -}; - -/** - * enum mic_status - MIC status reported by card after - * a host or card initiated shutdown or a card crash. - */ -enum mic_status { - MIC_NOP = 0, - MIC_CRASHED, - MIC_HALTED, - MIC_POWER_OFF, - MIC_RESTART, - MIC_STATUS_LAST -}; - -#endif diff --git a/include/uapi/linux/mic_ioctl.h b/include/uapi/linux/mic_ioctl.h deleted file mode 100644 index 687b9cd9d3e2..000000000000 --- a/include/uapi/linux/mic_ioctl.h +++ /dev/null @@ -1,77 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC Host driver. - * - */ -#ifndef _MIC_IOCTL_H_ -#define _MIC_IOCTL_H_ - -#include - -/* - * mic_copy - MIC virtio descriptor copy. - * - * @iov: An array of IOVEC structures containing user space buffers. - * @iovcnt: Number of IOVEC structures in iov. - * @vr_idx: The vring index. - * @update_used: A non zero value results in used index being updated. - * @out_len: The aggregate of the total length written to or read from - * the virtio device. - */ -struct mic_copy_desc { -#ifdef __KERNEL__ - struct iovec __user *iov; -#else - struct iovec *iov; -#endif - __u32 iovcnt; - __u8 vr_idx; - __u8 update_used; - __u32 out_len; -}; - -/* - * Add a new virtio device - * The (struct mic_device_desc *) pointer points to a device page entry - * for the virtio device consisting of: - * - struct mic_device_desc - * - struct mic_vqconfig (num_vq of these) - * - host and guest features - * - virtio device config space - * The total size referenced by the pointer should equal the size returned - * by desc_size() in mic_common.h - */ -#define MIC_VIRTIO_ADD_DEVICE _IOWR('s', 1, struct mic_device_desc *) - -/* - * Copy the number of entries in the iovec and update the used index - * if requested by the user. - */ -#define MIC_VIRTIO_COPY_DESC _IOWR('s', 2, struct mic_copy_desc *) - -/* - * Notify virtio device of a config change - * The (__u8 *) pointer points to config space values for the device - * as they should be written into the device page. The total size - * referenced by the pointer should equal the config_len field of struct - * mic_device_desc. - */ -#define MIC_VIRTIO_CONFIG_CHANGE _IOWR('s', 5, __u8 *) - -#endif -- cgit From a62f68f5ca53ab61cba2f0a410d0add7a6d54a52 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Oct 2020 17:35:46 +0200 Subject: cpufreq: Introduce cpufreq_driver_test_flags() Add a helper function to test the flags of the cpufreq driver in use againt a given flags mask. In particular, this will be needed to test the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil governor. Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 038ed83aab41..1eaa04f1bae6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -433,6 +433,7 @@ struct cpufreq_driver { int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); +bool cpufreq_driver_test_flags(u16 flags); const char *cpufreq_get_current_driver(void); void *cpufreq_get_driver_data(void); -- cgit From 185f0c7073bd5c78f86265f703f5daf1306ab5a7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 26 Oct 2020 13:22:47 +0000 Subject: afs: Wrap page->private manipulations in inline functions The afs filesystem uses page->private to store the dirty range within a page such that in the event of a conflicting 3rd-party write to the server, we write back just the bits that got changed locally. However, there are a couple of problems with this: (1) I need a bit to note if the page might be mapped so that partial invalidation doesn't shrink the range. (2) There aren't necessarily sufficient bits to store the entire range of data altered (say it's a 32-bit system with 64KiB pages or transparent huge pages are in use). So wrap the accesses in inline functions so that future commits can change how this works. Also move them out of the tracing header into the in-directory header. There's not really any need for them to be in the tracing header. Signed-off-by: David Howells --- include/trace/events/afs.h | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 8eb49231c6bb..866fc67d5aa5 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -966,19 +966,6 @@ TRACE_EVENT(afs_dir_check_failed, __entry->vnode, __entry->off, __entry->i_size) ); -/* - * We use page->private to hold the amount of the page that we've written to, - * splitting the field into two parts. However, we need to represent a range - * 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system. - */ -#if PAGE_SIZE > 32768 -#define AFS_PRIV_MAX 0xffffffff -#define AFS_PRIV_SHIFT 32 -#else -#define AFS_PRIV_MAX 0xffff -#define AFS_PRIV_SHIFT 16 -#endif - TRACE_EVENT(afs_page_dirty, TP_PROTO(struct afs_vnode *vnode, const char *where, pgoff_t page, unsigned long priv), @@ -999,10 +986,10 @@ TRACE_EVENT(afs_page_dirty, __entry->priv = priv; ), - TP_printk("vn=%p %lx %s %lu-%lu", + TP_printk("vn=%p %lx %s %zx-%zx", __entry->vnode, __entry->page, __entry->where, - __entry->priv & AFS_PRIV_MAX, - __entry->priv >> AFS_PRIV_SHIFT) + afs_page_dirty_from(__entry->priv), + afs_page_dirty_to(__entry->priv)) ); TRACE_EVENT(afs_call_state, -- cgit From f86726a69dec5df6ba051baf9265584419478b64 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Oct 2020 14:08:23 +0100 Subject: afs: Fix afs_invalidatepage to adjust the dirty region Fix afs_invalidatepage() to adjust the dirty region recorded in page->private when truncating a page. If the dirty region is entirely removed, then the private data is cleared and the page dirty state is cleared. Without this, if the page is truncated and then expanded again by truncate, zeros from the expanded, but no-longer dirty region may get written back to the server if the page gets laundered due to a conflicting 3rd-party write. It mustn't, however, shorten the dirty region of the page if that page is still mmapped and has been marked dirty by afs_page_mkwrite(), so a flag is stored in page->private to record this. Fixes: 4343d00872e1 ("afs: Get rid of the afs_writeback record") Signed-off-by: David Howells --- include/trace/events/afs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 866fc67d5aa5..4eef374d4413 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -986,10 +986,11 @@ TRACE_EVENT(afs_page_dirty, __entry->priv = priv; ), - TP_printk("vn=%p %lx %s %zx-%zx", + TP_printk("vn=%p %lx %s %zx-%zx%s", __entry->vnode, __entry->page, __entry->where, afs_page_dirty_from(__entry->priv), - afs_page_dirty_to(__entry->priv)) + afs_page_dirty_to(__entry->priv), + afs_is_page_dirty_mmapped(__entry->priv) ? " M" : "") ); TRACE_EVENT(afs_call_state, -- cgit From e5e1a4bc916d29958c3b587354293738fcb984d7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 27 Oct 2020 13:32:01 +0100 Subject: xsk: Fix possible memory leak at socket close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a possible memory leak at xsk socket close that is caused by the refcounting of the umem object being wrong. The reference count of the umem was decremented only after the pool had been freed. Note that if the buffer pool is destroyed, it is important that the umem is destroyed after the pool, otherwise the umem would disappear while the driver is still running. And as the buffer pool needs to be destroyed in a work queue, the umem is also (if its refcount reaches zero) destroyed after the buffer pool in that same work queue. What was missing is that the refcount also needs to be decremented when the pool is not freed and when the pool has not even been created. The first case happens when the refcount of the pool is higher than 1, i.e. it is still being used by some other socket using the same device and queue id. In this case, it is safe to decrement the refcount of the umem outside of the work queue as the umem will never be freed because the refcount of the umem is always greater than or equal to the refcount of the buffer pool. The second case is if the buffer pool has not been created yet, i.e. the socket was closed before it was bound but after the umem was created. In this case, it is safe to destroy the umem outside of the work queue, since there is no pool that can use it by definition. Fixes: 1c1efc2af158 ("xsk: Create and free buffer pool independently from umem") Reported-by: syzbot+eb71df123dc2be2c1456@syzkaller.appspotmail.com Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1603801921-2712-1-git-send-email-magnus.karlsson@gmail.com --- include/net/xsk_buff_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 0140d086dc84..01755b838c74 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -86,7 +86,7 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, void xp_destroy(struct xsk_buff_pool *pool); void xp_release(struct xdp_buff_xsk *xskb); void xp_get_pool(struct xsk_buff_pool *pool); -void xp_put_pool(struct xsk_buff_pool *pool); +bool xp_put_pool(struct xsk_buff_pool *pool); void xp_clear_dev(struct xsk_buff_pool *pool); void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); -- cgit From 4169e889e5889405d54cec27d6e9f7f0ce3c7096 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 2 Sep 2020 23:25:55 -0500 Subject: include: jhash/signal: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, explicitly add break statements instead of letting the code fall through to the next case. This patch adds four break statements that, together, fix almost 40,000 warnings when building Linux 5.10-rc1 with Clang 12.0.0 and this[1] change reverted. Notice that in order to enable -Wimplicit-fallthrough for Clang, such change[1] is meant to be reverted at some point. So, this patch helps to move in that direction. Something important to mention is that there is currently a discrepancy between GCC and Clang when dealing with switch fall-through to empty case statements or to cases that only contain a break/continue/return statement[2][3][4]. Now that the -Wimplicit-fallthrough option has been globally enabled[5], any compiler should really warn on missing either a fallthrough annotation or any of the other case-terminating statements (break/continue/return/ goto) when falling through to the next case statement. Making exceptions to this introduces variation in case handling which may continue to lead to bugs, misunderstandings, and a general lack of robustness. The point of enabling options like -Wimplicit-fallthrough is to prevent human error and aid developers in spotting bugs before their code is even built/ submitted/committed, therefore eliminating classes of bugs. So, in order to really accomplish this, we should, and can, move in the direction of addressing any error-prone scenarios and get rid of the unintentional fallthrough bug-class in the kernel, entirely, even if there is some minor redundancy. Better to have explicit case-ending statements than continue to have exceptions where one must guess as to the right result. The compiler will eliminate any actual redundancy. [1] commit e2079e93f562c ("kbuild: Do not enable -Wimplicit-fallthrough for clang for now") [2] https://github.com/ClangBuiltLinux/linux/issues/636 [3] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91432 [4] https://godbolt.org/z/xgkvIh [5] commit a035d552a93b ("Makefile: Globally enable fall-through warning") Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/linux/jhash.h | 2 ++ include/linux/signal.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index cfb62e9f37be..ab7f8c152b89 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -99,6 +99,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) case 2: a += (u32)k[1]<<8; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); + break; case 0: /* Nothing left to add */ break; } @@ -136,6 +137,7 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) case 2: b += k[1]; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); + break; case 0: /* Nothing left to add */ break; } diff --git a/include/linux/signal.h b/include/linux/signal.h index 7bbc0e9cf084..b256f9c65661 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -238,6 +238,7 @@ static inline void siginitset(sigset_t *set, unsigned long mask) memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = 0; + break; case 1: ; } } @@ -250,6 +251,7 @@ static inline void siginitsetinv(sigset_t *set, unsigned long mask) memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1)); break; case 2: set->sig[1] = -1; + break; case 1: ; } } -- cgit From a4147d855f50a676ebe61833a681f7c71945f343 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:18:04 -0500 Subject: dmaengine: ti-cppi5: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/dma/ti-cppi5.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index 5896441ee604..efa2f0309f00 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -47,7 +47,7 @@ struct cppi5_host_desc_t { u32 buf_info1; u32 org_buf_len; u64 org_buf_ptr; - u32 epib[0]; + u32 epib[]; } __packed; #define CPPI5_DESC_MIN_ALIGN (16U) @@ -139,7 +139,7 @@ struct cppi5_desc_epib_t { */ struct cppi5_monolithic_desc_t { struct cppi5_desc_hdr_t hdr; - u32 epib[0]; + u32 epib[]; }; #define CPPI5_INFO2_MDESC_DATA_OFFSET_SHIFT (18U) -- cgit From 277ffd6c1ec0aa60856a03e18455fcca7d2a1186 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:19:18 -0500 Subject: mailbox: zynqmp-ipi-message: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/mailbox/zynqmp-ipi-message.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mailbox/zynqmp-ipi-message.h b/include/linux/mailbox/zynqmp-ipi-message.h index 9542b41eacfd..35ce84c8ca02 100644 --- a/include/linux/mailbox/zynqmp-ipi-message.h +++ b/include/linux/mailbox/zynqmp-ipi-message.h @@ -14,7 +14,7 @@ */ struct zynqmp_ipi_message { size_t len; - u8 data[0]; + u8 data[]; }; #endif /* _LINUX_ZYNQMP_IPI_MESSAGE_H_ */ -- cgit From 883541051567a62add043a9f4ca5a31f2970bffd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:21:14 -0500 Subject: platform/chrome: cros_ec_commands: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/platform_data/cros_ec_commands.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 1fcfe9e63cb9..a3a9a878415f 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1419,7 +1419,7 @@ struct ec_response_flash_info_2 { uint16_t num_banks_total; /* Number of banks described in banks array. */ uint16_t num_banks_desc; - struct ec_flash_bank banks[0]; + struct ec_flash_bank banks[]; } __ec_align4; /* @@ -2420,12 +2420,12 @@ struct ec_response_motion_sense_fifo_info { /* Total amount of vector lost */ uint16_t total_lost; /* Lost events since the last fifo_info, per sensors */ - uint16_t lost[0]; + uint16_t lost[]; } __ec_todo_packed; struct ec_response_motion_sense_fifo_data { uint32_t number_data; - struct ec_response_motion_sensor_data data[0]; + struct ec_response_motion_sensor_data data[]; } __ec_todo_packed; /* List supported activity recognition */ @@ -3093,7 +3093,7 @@ struct ec_response_tmp006_get_calibration_v1 { uint8_t algorithm; uint8_t num_params; uint8_t reserved[2]; - float val[0]; + float val[]; } __ec_align4; struct ec_params_tmp006_set_calibration_v1 { @@ -3101,7 +3101,7 @@ struct ec_params_tmp006_set_calibration_v1 { uint8_t algorithm; uint8_t num_params; uint8_t reserved; - float val[0]; + float val[]; } __ec_align4; @@ -5076,7 +5076,7 @@ struct ec_response_pd_log { uint8_t type; /* event type : see PD_EVENT_xx below */ uint8_t size_port; /* [7:5] port number [4:0] payload size in bytes */ uint16_t data; /* type-defined data payload */ - uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */ + uint8_t payload[]; /* optional additional data payload: 0..16 bytes */ } __ec_align4; /* The timestamp is the microsecond counter shifted to get about a ms. */ @@ -5789,7 +5789,7 @@ struct ec_response_fp_encryption_status { struct ec_response_tp_frame_info { uint32_t n_frames; - uint32_t frame_sizes[0]; + uint32_t frame_sizes[]; } __ec_align4; /* Create a snapshot of current frame readings */ -- cgit From 120088832042e6dc9866160ff267f8c347bf53e6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 10:21:55 -0500 Subject: platform/chrome: cros_ec_proto: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/platform_data/cros_ec_proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 4a415ae851ef..02599687770c 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -69,7 +69,7 @@ struct cros_ec_command { uint32_t outsize; uint32_t insize; uint32_t result; - uint8_t data[0]; + uint8_t data[]; }; /** -- cgit From 5e01fdff04b7f7c3b8d456c11c8a9f978b4ddf65 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 31 Aug 2020 08:25:42 -0500 Subject: fs: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..21cc971fd960 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3285,7 +3285,7 @@ static inline ino_t parent_ino(struct dentry *dentry) */ struct simple_transaction_argresp { ssize_t size; - char data[0]; + char data[]; }; #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) -- cgit From 080b6f40763565f65ebb9540219c71ce885cf568 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 18:15:05 +0100 Subject: bpf: Don't rely on GCC __attribute__((optimize)) to disable GCSE Commit 3193c0836 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") introduced a __no_fgcse macro that expands to a function scope __attribute__((optimize("-fno-gcse"))), to disable a GCC specific optimization that was causing trouble on x86 builds, and was not expected to have any positive effect in the first place. However, as the GCC manual documents, __attribute__((optimize)) is not for production use, and results in all other optimization options to be forgotten for the function in question. This can cause all kinds of trouble, but in one particular reported case, it causes -fno-asynchronous-unwind-tables to be disregarded, resulting in .eh_frame info to be emitted for the function. This reverts commit 3193c0836, and instead, it disables the -fgcse optimization for the entire source file, but only when building for X86 using GCC with CONFIG_BPF_JIT_ALWAYS_ON disabled. Note that the original commit states that CONFIG_RETPOLINE=n triggers the issue, whereas CONFIG_RETPOLINE=y performs better without the optimization, so it is kept disabled in both cases. Fixes: 3193c0836f20 ("bpf: Disable GCC -fgcse optimization for ___bpf_prog_run()") Signed-off-by: Ard Biesheuvel Signed-off-by: Alexei Starovoitov Tested-by: Geert Uytterhoeven Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/lkml/CAMuHMdUg0WJHEcq6to0-eODpXPOywLot6UD2=GFHpzoj_hCoBQ@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20201028171506.15682-2-ardb@kernel.org --- include/linux/compiler-gcc.h | 2 -- include/linux/compiler_types.h | 4 ---- 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d1e3c6896b71..5deb37024574 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -175,5 +175,3 @@ #else #define __diag_GCC_8(s) #endif - -#define __no_fgcse __attribute__((optimize("-fno-gcse"))) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6e390d58a9f8..ac3fa37a84f9 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -247,10 +247,6 @@ struct ftrace_likely_data { #define asm_inline asm #endif -#ifndef __no_fgcse -# define __no_fgcse -#endif - /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -- cgit From 0d519cbf38eed4f895aed197d4b135fa7f60f7c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Oct 2020 15:10:37 +0200 Subject: debugfs: remove return value of debugfs_create_devm_seqfile() No one checks the return value of debugfs_create_devm_seqfile(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20201023131037.2500765-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 851dd1f9a8a5..d6c4cc9ecc77 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -144,10 +144,9 @@ void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, struct debugfs_u32_array *array); -struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, - struct dentry *parent, - int (*read_fn)(struct seq_file *s, - void *data)); +void debugfs_create_devm_seqfile(struct device *dev, const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, void *data)); bool debugfs_initialized(void); @@ -327,13 +326,12 @@ static inline void debugfs_create_u32_array(const char *name, umode_t mode, { } -static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev, - const char *name, - struct dentry *parent, - int (*read_fn)(struct seq_file *s, - void *data)) +static inline void debugfs_create_devm_seqfile(struct device *dev, + const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, + void *data)) { - return ERR_PTR(-ENODEV); } static inline ssize_t debugfs_read_file_bool(struct file *file, -- cgit From b1e8eb11fb9cf666d8ae36bbcf533233a504c921 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:08 +0200 Subject: mac80211: fix kernel-doc markups Some identifiers have different names between their prototypes and the kernel-doc markup. Others need to be fixed, as kernel-doc markups should use this format: identifier - description In the specific case of __sta_info_flush(), add a documentation for sta_info_flush(), as this one is the one used outside sta_info.c. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Johannes Berg Link: https://lore.kernel.org/r/978d35eef2dc76e21c81931804e4eaefbd6d635e.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++---- include/net/mac80211.h | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 661edfc8722e..d5ab8d99739f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1444,7 +1444,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, enum cfg80211_station_type statype); /** - * enum station_info_rate_flags - bitrate info flags + * enum rate_info_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. @@ -1517,7 +1517,7 @@ struct rate_info { }; /** - * enum station_info_rate_flags - bitrate info flags + * enum bss_param_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. @@ -6467,7 +6467,8 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp); /** - * cfg80211_notify_new_candidate - notify cfg80211 of a new mesh peer candidate + * cfg80211_notify_new_peer_candidate - notify cfg80211 of a new mesh peer + * candidate * * @dev: network device * @macaddr: the MAC address of the new candidate @@ -7606,7 +7607,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); void cfg80211_unregister_wdev(struct wireless_dev *wdev); /** - * struct cfg80211_ft_event - FT Information Elements + * struct cfg80211_ft_event_params - FT Information Elements * @ies: FT IEs * @ies_len: length of the FT IE in bytes * @target_ap: target AP's MAC address diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e8e295dae744..dcdba96814a2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3311,7 +3311,7 @@ enum ieee80211_roc_type { }; /** - * enum ieee80211_reconfig_complete_type - reconfig type + * enum ieee80211_reconfig_type - reconfig type * * This enum is used by the reconfig_complete() callback to indicate what * reconfiguration type was completed. @@ -6334,7 +6334,8 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, int band, struct ieee80211_sta **sta); /** - * Sanity-check and parse the radiotap header of injected frames + * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header + * of injected frames * @skb: packet injected by userspace * @dev: the &struct device of this 802.11 device */ @@ -6389,7 +6390,7 @@ int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr, void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf); /** - * ieee80211_tdls_oper - request userspace to perform a TDLS operation + * ieee80211_tdls_oper_request - request userspace to perform a TDLS operation * @vif: virtual interface * @peer: the peer's destination address * @oper: the requested TDLS operation -- cgit From 46d6c5ae953cc0be38efd0e469284df7c4328cf8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:06 +0100 Subject: netfilter: use actual socket sk rather than skb sk when routing harder If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit(): /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing. One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes. So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 2 +- include/linux/netfilter_ipv6.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 082e2c41b7ff..5b70ca868bb1 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; }; -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type); struct nf_queue_entry; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 9b67394471e1..48314ade1506 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -42,7 +42,7 @@ struct nf_ipv6_ops { #if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); @@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, #endif } -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); -static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); @@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) if (!v6_ops) return -EHOSTUNREACH; - return v6_ops->route_me_harder(net, skb); + return v6_ops->route_me_harder(net, sk, skb); #elif IS_BUILTIN(CONFIG_IPV6) - return ip6_route_me_harder(net, skb); + return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; #endif -- cgit From c0391b6ab810381df632677a1dcbbbbd63d05b6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 29 Oct 2020 13:50:03 +0100 Subject: netfilter: nf_tables: missing validation from the abort path If userspace does not include the trailing end of batch message, then nfnetlink aborts the transaction. This allows to check that ruleset updates trigger no errors. After this patch, invoking this command from the prerouting chain: # nft -c add rule x y fib saddr . oif type local fails since oif is not supported there. This patch fixes the lack of rule validation from the abort/check path to catch configuration errors such as the one above. Fixes: a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation") Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 89016d08f6a2..f6267e2883f2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -24,6 +24,12 @@ struct nfnl_callback { const u_int16_t attr_count; /* number of nlattr's */ }; +enum nfnl_abort_action { + NFNL_ABORT_NONE = 0, + NFNL_ABORT_AUTOLOAD, + NFNL_ABORT_VALIDATE, +}; + struct nfnetlink_subsystem { const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ @@ -31,7 +37,8 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); - int (*abort)(struct net *net, struct sk_buff *skb, bool autoload); + int (*abort)(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action); void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; -- cgit From 290562075d4d9e85b7ff4104f9a634ffc3cccb69 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 27 Oct 2020 15:28:40 -0500 Subject: net/mlx5: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays Signed-off-by: Gustavo A. R. Silva --- include/linux/mlx5/mlx5_ifc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 651591a2965d..a092346c7b2d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5823,7 +5823,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits { u8 reserved_at_68[0x10]; u8 num_of_actions[0x8]; - union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0]; + union mlx5_ifc_set_add_copy_action_in_auto_bits actions[]; }; struct mlx5_ifc_dealloc_modify_header_context_out_bits { @@ -9761,7 +9761,7 @@ struct mlx5_ifc_mcda_reg_bits { u8 reserved_at_60[0x20]; - u8 data[0][0x20]; + u8 data[][0x20]; }; enum { -- cgit From b59e286be280fa3c2e94a0716ddcee6ba02bc8ba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 27 Oct 2020 20:33:12 +0800 Subject: ICMPv6: Add ICMPv6 Parameter Problem, code 3 definition Based on RFC7112, Section 6: IANA has added the following "Type 4 - Parameter Problem" message to the "Internet Control Message Protocol version 6 (ICMPv6) Parameters" registry: CODE NAME/DESCRIPTION 3 IPv6 First Fragment has incomplete IPv6 Header Chain Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- include/uapi/linux/icmpv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index c1661febc2dc..0564fd7ccde4 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -138,6 +138,7 @@ struct icmp6hdr { #define ICMPV6_HDR_FIELD 0 #define ICMPV6_UNK_NEXTHDR 1 #define ICMPV6_UNK_OPTION 2 +#define ICMPV6_HDR_INCOMP 3 /* * constants for (set|get)sockopt -- cgit From f51778db088b2407ec177f2f4da0f6290602aa3f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 2 Nov 2020 12:43:27 +1100 Subject: swiotlb: using SIZE_MAX needs limits.h included After merging the drm-misc tree, linux-next build (arm multi_v7_defconfig) failed like this: In file included from drivers/gpu/drm/nouveau/nouveau_ttm.c:26: include/linux/swiotlb.h: In function 'swiotlb_max_mapping_size': include/linux/swiotlb.h:99:9: error: 'SIZE_MAX' undeclared (first use in this function) 99 | return SIZE_MAX; | ^~~~~~~~ include/linux/swiotlb.h:7:1: note: 'SIZE_MAX' is defined in header ''; did you forget to '#include '? 6 | #include +++ |+#include 7 | #include include/linux/swiotlb.h:99:9: note: each undeclared identifier is reported only once for each function it appears in 99 | return SIZE_MAX; | ^~~~~~~~ Caused by commit abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") but only exposed by commit "drm/nouveu: fix swiotlb include" Fix it by including linux/limits.h as appropriate. Fixes: abe420bfae52 ("swiotlb: Introduce swiotlb_max_mapping_size()") Signed-off-by: Stephen Rothwell Link: https://lore.kernel.org/r/20201102124327.2f82b2a7@canb.auug.org.au Signed-off-by: Michael S. Tsirkin --- include/linux/swiotlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 046bb94bd4d6..fa5122c6711e 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -5,6 +5,7 @@ #include #include #include +#include struct device; struct page; -- cgit From fc0021aa340af65a0a37d77be39e22aa886a6132 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Oct 2020 08:33:09 +0200 Subject: swiotlb: remove the tbl_dma_addr argument to swiotlb_tbl_map_single The tbl_dma_addr argument is used to check the DMA boundary for the allocations, and thus needs to be a dma_addr_t. swiotlb-xen instead passed a physical address, which could lead to incorrect results for strange offsets. Fix this by removing the parameter entirely and hard code the DMA address for io_tlb_start instead. Fixes: 91ffe4ad534a ("swiotlb-xen: introduce phys_to_dma/dma_to_phys translations") Signed-off-by: Christoph Hellwig Reviewed-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 513913ff7486..3bb72266a75a 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,13 +45,9 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, }; -extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t phys, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs); +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, -- cgit From e0e398e204634db8fb71bd89cf2f6e3e5bd09b51 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:12:15 +0200 Subject: PM: runtime: Drop runtime PM references to supplier on link removal While removing a device link, drop the supplier device's runtime PM usage counter as many times as needed to drop all of the runtime PM references to it from the consumer in addition to dropping the consumer's link count. Fixes: baa8809f6097 ("PM / runtime: Optimize the use of device links") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_runtime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 18b02dcc168e..eadc1fdebce6 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,7 +58,7 @@ extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); -extern void pm_runtime_drop_link(struct device *dev); +extern void pm_runtime_drop_link(struct device_link *link); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. @@ -280,7 +280,7 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -static inline void pm_runtime_drop_link(struct device *dev) {} +static inline void pm_runtime_drop_link(struct device_link *link) {} #endif /* !CONFIG_PM */ -- cgit From d6e36668598154820177bfd78c1621d8e6c580a2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Oct 2020 21:13:10 +0200 Subject: PM: runtime: Drop pm_runtime_clean_up_links() After commit d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") nothing prevents the consumer device's runtime PM from acquiring additional references to the supplier device after pm_runtime_clean_up_links() has run (or even while it is running), so calling this function from __device_release_driver() may be pointless (or even harmful). Moreover, it ignores stateless device links, so the runtime PM handling of managed and stateless device links is inconsistent because of it, so better get rid of it entirely. Fixes: d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()") Signed-off-by: Rafael J. Wysocki Cc: 5.1+ # 5.1+ Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman --- include/linux/pm_runtime.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index eadc1fdebce6..4b708f4e8eed 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -54,7 +54,6 @@ extern u64 pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); -extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); @@ -276,7 +275,6 @@ static inline u64 pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} -static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} -- cgit From f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 1 Nov 2020 17:08:00 -0800 Subject: mm: always have io_remap_pfn_range() set pgprot_decrypted() The purpose of io_remap_pfn_range() is to map IO memory, such as a memory mapped IO exposed through a PCI BAR. IO devices do not understand encryption, so this memory must always be decrypted. Automatically call pgprot_decrypted() as part of the generic implementation. This fixes a bug where enabling AMD SME causes subsystems, such as RDMA, using io_remap_pfn_range() to expose BAR pages to user space to fail. The CPU will encrypt access to those BAR pages instead of passing unencrypted IO directly to the device. Places not mapping IO should use remap_pfn_range(). Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Tom Lendacky Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brijesh Singh Cc: Jonathan Corbet Cc: Dmitry Vyukov Cc: "Dave Young" Cc: Alexander Potapenko Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Larry Woodman Cc: Matt Fleming Cc: Ingo Molnar Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Toshimitsu Kani Cc: Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ include/linux/pgtable.h | 4 ---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ef360fe70aaf..db6ae4d3fb4e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 38c33eabea89..71125a4676c4 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit From 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 18 Dec 2019 09:39:02 +0100 Subject: can: can_create_echo_skb(): fix echo skb generation: always use skb_clone() All user space generated SKBs are owned by a socket (unless injected into the key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned up. This leads to a problem when a CAN driver calls can_put_echo_skb() on a unshared SKB. If the socket is closed prior to the TX complete handler, can_get_echo_skb() and the subsequent delivering of the echo SKB to all registered callbacks, a SKB with a refcount of 0 is delivered. To avoid the problem, in can_get_echo_skb() the original SKB is now always cloned, regardless of shared SKB or not. If the process exists it can now safely discard its SKBs, without disturbing the delivery of the echo SKB. The problem shows up in the j1939 stack, when it clones the incoming skb, which detects the already 0 refcount. We can easily reproduce this with following example: testj1939 -B -r can0: & cansend can0 1823ff40#0123 WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174 refcount_t: addition on 0; use-after-free. Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x8c/0xa0) [] (dump_stack) from [] (__warn+0xe0/0x108) [] (__warn) from [] (warn_slowpath_fmt+0xa8/0xcc) [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x108/0x174) [] (refcount_warn_saturate) from [] (j1939_can_recv+0x20c/0x210) [] (j1939_can_recv) from [] (can_rcv_filter+0xb4/0x268) [] (can_rcv_filter) from [] (can_receive+0xb0/0xe4) [] (can_receive) from [] (can_rcv+0x48/0x98) [] (can_rcv) from [] (__netif_receive_skb_one_core+0x64/0x88) [] (__netif_receive_skb_one_core) from [] (__netif_receive_skb+0x38/0x94) [] (__netif_receive_skb) from [] (netif_receive_skb_internal+0x64/0xf8) [] (netif_receive_skb_internal) from [] (netif_receive_skb+0x34/0x19c) [] (netif_receive_skb) from [] (can_rx_offload_napi_poll+0x58/0xb4) Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Signed-off-by: Oleksij Rempel Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 900b9f4e0605..fc61cf4eff1c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *nskb; - if (likely(nskb)) { - can_skb_set_owner(nskb, skb->sk); - consume_skb(skb); - return nskb; - } else { - kfree_skb(skb); - return NULL; - } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + kfree_skb(skb); + return NULL; } - /* we can assume to have an unshared skb with proper owner */ - return skb; + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; } #endif /* !_CAN_SKB_H */ -- cgit From 763e4cdc0f6d5cea45c896fef67f7be4bdefcca7 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 29 Oct 2020 14:30:48 -0700 Subject: iomap: support partial page discard on writeback block mapping failure iomap writeback mapping failure only calls into ->discard_page() if the current page has not been added to the ioend. Accordingly, the XFS callback assumes a full page discard and invalidation. This is problematic for sub-page block size filesystems where some portion of a page might have been mapped successfully before a failure to map a delalloc block occurs. ->discard_page() is not called in that error scenario and the bio is explicitly failed by iomap via the error return from ->prepare_ioend(). As a result, the filesystem leaks delalloc blocks and corrupts the filesystem block counters. Since XFS is the only user of ->discard_page(), tweak the semantics to invoke the callback unconditionally on mapping errors and provide the file offset that failed to map. Update xfs_discard_page() to discard the corresponding portion of the file and pass the range along to iomap_invalidatepage(). The latter already properly handles both full and sub-page scenarios by not changing any iomap or page state on sub-page invalidations. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 172b3397a1a3..5bd3cac4df9c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -221,7 +221,7 @@ struct iomap_writeback_ops { * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ - void (*discard_page)(struct page *page); + void (*discard_page)(struct page *page, loff_t fileoff); }; struct iomap_writepage_ctx { -- cgit From fdaf083cdfb556a45c422c8998268baf1ab26829 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Oct 2020 09:37:30 -0600 Subject: io_uring: properly handle SQPOLL request cancelations Track if a given task io_uring context contains SQPOLL instances, so we can iterate those for cancelation (and request counts). This ensures that we properly wait on SQPOLL contexts, and find everything that needs canceling. Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 868364cea3b7..35b2d845704d 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -30,7 +30,8 @@ struct io_uring_task { struct percpu_counter inflight; struct io_identity __identity; struct io_identity *identity; - bool in_idle; + atomic_t in_idle; + bool sqpoll; }; #if defined(CONFIG_IO_URING) -- cgit From 93bd813c17763177cf87e96c2313bd4dd747d234 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Thu, 5 Nov 2020 11:08:04 +0800 Subject: ASoC: rt1015: add delay to fix pop noise from speaker Add delay to fix pop noise from speaker. Signed-off-by: Jack Yu Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20201105030804.31115-1-jack.yu@realtek.com Signed-off-by: Mark Brown --- include/sound/rt1015.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/sound/rt1015.h (limited to 'include') diff --git a/include/sound/rt1015.h b/include/sound/rt1015.h new file mode 100644 index 000000000000..70a7538d4c89 --- /dev/null +++ b/include/sound/rt1015.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/sound/rt1015.h -- Platform data for RT1015 + * + * Copyright 2020 Realtek Microelectronics + */ + +#ifndef __LINUX_SND_RT1015_H +#define __LINUX_SND_RT1015_H + +struct rt1015_platform_data { + unsigned int power_up_delay_ms; +}; + +#endif -- cgit From d321ff589c16d8c2207485a6d7fbdb14e873d46e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Oct 2020 10:41:07 -0400 Subject: SUNRPC: Fix general protection fault in trace_rpc_xdr_overflow() The TP_fast_assign() section is careful enough not to dereference xdr->rqst if it's NULL. The TP_STRUCT__entry section is not. Fixes: 5582863f450c ("SUNRPC: Add XDR overflow trace event") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f45b3c01370c..2477014e3fa6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -655,10 +655,10 @@ TRACE_EVENT(rpc_xdr_overflow, __field(size_t, tail_len) __field(unsigned int, page_len) __field(unsigned int, len) - __string(progname, - xdr->rqst->rq_task->tk_client->cl_program->name) - __string(procedure, - xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) + __string(progname, xdr->rqst ? + xdr->rqst->rq_task->tk_client->cl_program->name : "unknown") + __string(procedure, xdr->rqst ? + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name : "unknown") ), TP_fast_assign( -- cgit From d4d50710a8b46082224376ef119a4dbb75b25c56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Nov 2020 09:27:33 +0100 Subject: seq_file: add seq_read_iter iov_iter based variant for reading a seq_file. seq_read is reimplemented on top of the iter variant. Signed-off-by: Christoph Hellwig Tested-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 813614d4b71f..b83b3ae3c877 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -107,6 +107,7 @@ void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); +ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len); -- cgit From b21ebf143af219207214c79bc217beb39c43212a Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:51 -0800 Subject: ext4: mark fc ineligible if inode gets evictied due to mem pressure If inode gets evicted due to memory pressure, we have to remove it from the fast commit list. However, that inode may have uncommitted changes that fast commits will lose. So, just fall back to full commits in this case. Also, rename the fast commit ineligiblity reason from "EXT4_FC_REASON_MEM" to "EXT4_FC_REASON_MEM_NOMEM" for better expression. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-3-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index b14314fcf732..0f899d3b09d3 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -100,7 +100,7 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { EXT4_FC_REASON_XATTR, "XATTR"}, \ { EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \ { EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \ - { EXT4_FC_REASON_MEM, "NO_MEM"}, \ + { EXT4_FC_REASON_NOMEM, "NO_MEM"}, \ { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ @@ -2917,13 +2917,13 @@ TRACE_EVENT(ext4_fc_stats, ), TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; " + "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " "num_commits:%ld, ineligible: %ld, numblks: %ld", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME), FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE), - FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM), + FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM), FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT), FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), -- cgit From ede7dc7fa0af619afc08995776eadb9ff3b0a711 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:54 -0800 Subject: jbd2: rename j_maxlen to j_total_len and add jbd2_journal_max_txn_bufs The on-disk superblock field sb->s_maxlen represents the total size of the journal including the fast commit area and is no more the max number of blocks available for a transaction. The maximum number of blocks available to a transaction is reduced by the number of fast commit blocks. So, this patch renames j_maxlen to j_total_len to better represent its intent. Also, it adds a function to calculate max number of bufs available for a transaction. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-6-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1d5566af48ac..e0b6b53eae64 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -988,9 +988,9 @@ struct journal_s struct block_device *j_fs_dev; /** - * @j_maxlen: Total maximum capacity of the journal region on disk. + * @j_total_len: Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; + unsigned int j_total_len; /** * @j_reserved_credits: @@ -1624,6 +1624,11 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); +static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) +{ + return (journal->j_total_len - journal->j_fc_wbufsize) / 4; +} + /* * is_journal_abort * -- cgit From a1e5e465b31d6015fccb359d99053b39e5180466 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:55 -0800 Subject: ext4: clean up the JBD2 API that initializes fast commits This patch removes jbd2_fc_init() API and its related functions to simplify enabling fast commits. With this change, the number of fast commit blocks to use is solely determined by the JBD2 layer. So, we move the default value for minimum number of fast commit blocks from ext4/fast_commit.h to include/linux/jbd2.h. However, whether or not to use fast commits is determined by the file system. The file system just sets the fast commit feature using jbd2_journal_set_features(). JBD2 layer then determines how many blocks to use for fast commits (based on the value found in the JBD2 superblock). Note that the JBD2 feature flag of fast commits is just an indication that there are fast commit blocks present on disk. It doesn't tell JBD2 layer about the intent of the file system of whether to it wants to use fast commit or not. That's why, we blindly clear the fast commit flag in journal_reset() after the recovery is done. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-7-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e0b6b53eae64..b2caf7bbd8e5 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags); extern void jbd2_free(void *ptr, size_t size); #define JBD2_MIN_JOURNAL_BLOCKS 1024 +#define JBD2_MIN_FC_BLOCKS 256 #ifdef __KERNEL__ @@ -1614,7 +1615,6 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *); extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ -int jbd2_fc_init(journal_t *journal, int num_fc_blks); int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); -- cgit From c460e5edc85a063ec9cb60addff93d00ed378701 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:57 -0800 Subject: jbd2: don't use state lock during commit path Variables journal->j_fc_off, journal->j_fc_wbuf are accessed during commit path. Since today we allow only one process to perform a fast commit, there is no need take state lock before accessing these variables. This patch removes these locks and adds comments to describe this. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-9-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b2caf7bbd8e5..5f0ef6380b0c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -945,8 +945,9 @@ struct journal_s /** * @j_fc_off: * - * Number of fast commit blocks currently allocated. - * [j_state_lock]. + * Number of fast commit blocks currently allocated. Accessed only + * during fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ unsigned long j_fc_off; @@ -1109,8 +1110,9 @@ struct journal_s struct buffer_head **j_wbuf; /** - * @j_fc_wbuf: Array of fast commit bhs for - * jbd2_journal_commit_transaction. + * @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only + * during a fast commit. Currently only process can do fast commit, so + * this field is not protected by any lock. */ struct buffer_head **j_fc_wbuf; -- cgit From 0bce577bf9cae13ae32d391432d0030e3f67fc1d Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:58:58 -0800 Subject: jbd2: don't pass tid to jbd2_fc_end_commit_fallback() In jbd2_fc_end_commit_fallback(), we know which tid to commit. There's no need for caller to pass it. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201106035911.1942128-10-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5f0ef6380b0c..1c49fd62ff2e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1619,7 +1619,7 @@ extern int jbd2_cleanup_journal_tail(journal_t *); /* Fast commit related APIs */ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); -int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid); +int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); -- cgit From 556e0319fbb8eee3fa19fdcc27c8bcb4af1c7211 Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 5 Nov 2020 19:59:07 -0800 Subject: ext4: disable fast commit with data journalling Fast commits don't work with data journalling. This patch disables the fast commit support when data journalling is turned on. Suggested-by: Jan Kara Signed-off-by: Harshad Shirwadkar Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201106035911.1942128-19-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 0f899d3b09d3..70ae5497b73a 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -104,7 +104,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \ { EXT4_FC_REASON_RESIZE, "RESIZE"}, \ { EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \ - { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}) + { EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \ + { EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}) TRACE_EVENT(ext4_other_inode_update_time, TP_PROTO(struct inode *inode, ino_t orig_ino), @@ -2917,7 +2918,7 @@ TRACE_EVENT(ext4_fc_stats, ), TP_printk("dev %d:%d fc ineligible reasons:\n" - "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " + "%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; " "num_commits:%ld, ineligible: %ld, numblks: %ld", MAJOR(__entry->dev), MINOR(__entry->dev), FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR), @@ -2928,6 +2929,7 @@ TRACE_EVENT(ext4_fc_stats, FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE), FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR), FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE), + FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA), __entry->sbi->s_fc_stats.fc_num_commits, __entry->sbi->s_fc_stats.fc_ineligible_commits, __entry->sbi->s_fc_stats.fc_numblks) -- cgit From 267fb27352b6fc9fdbad753127a239f75618ecbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 15:50:32 +0100 Subject: perf: Reduce stack usage of perf_output_begin() __perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org --- include/linux/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..b775ae0a8c87 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1400,11 +1400,14 @@ perf_event_addr_filters(struct perf_event *event) extern void perf_event_addr_filters_sync(struct perf_event *event); extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, - unsigned int size); + struct perf_sample_data *data, + struct perf_event *event, + unsigned int size); extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); -- cgit From 76a4efa80900fc40e0fdf243b42aec9fb8c35d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:14:21 +0100 Subject: perf/arch: Remove perf_sample_data::regs_user_copy struct perf_sample_data lives on-stack, we should be careful about it's size. Furthermore, the pt_regs copy in there is only because x86_64 is a trainwreck, solve it differently. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20201030151955.258178461@infradead.org --- include/linux/perf_event.h | 6 ------ include/linux/perf_regs.h | 6 ++---- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b775ae0a8c87..96450f6fb1de 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1022,13 +1022,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; u64 aux_size; - /* - * regs_user may point to task_pt_regs or to regs_user_copy, depending - * on arch details. - */ struct perf_regs regs_user; - struct pt_regs regs_user_copy; - struct perf_regs regs_intr; u64 stack_user_size; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 2d12e97d5e7b..f632c5725f16 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -20,8 +20,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy); + struct pt_regs *regs); #else #define PERF_REG_EXTENDED_MASK 0 @@ -42,8 +41,7 @@ static inline u64 perf_reg_abi(struct task_struct *task) } static inline void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); -- cgit From 9a2a9ebc0a758d887ee06e067e9f7f0b36ff7574 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:25:57 +0100 Subject: cpufreq: Introduce governor flags A new cpufreq governor flag will be added subsequently, so replace the bool dynamic_switching fleid in struct cpufreq_governor with a flags field and introduce CPUFREQ_GOV_DYNAMIC_SWITCHING to set for the "dynamic switching" governors instead of it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1eaa04f1bae6..9bdfcf3c4748 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -570,12 +570,17 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - /* For governors which change frequency dynamically by themselves */ - bool dynamic_switching; struct list_head governor_list; struct module *owner; + u8 flags; }; +/* Governor flags */ + +/* For governors which change frequency dynamically by themselves */ +#define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) + + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq); -- cgit From 218f66870181bec7aaa6e3c72f346039c590c3c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:10 +0100 Subject: cpufreq: Introduce CPUFREQ_GOV_STRICT_TARGET Introduce a new governor flag, CPUFREQ_GOV_STRICT_TARGET, for the governors that want the target frequency to be set exactly to the given value without leaving any room for adjustments on the hardware side and set this flag for the powersave and performance governors. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9bdfcf3c4748..6eb9a3b8ec7b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -580,6 +580,9 @@ struct cpufreq_governor { /* For governors which change frequency dynamically by themselves */ #define CPUFREQ_GOV_DYNAMIC_SWITCHING BIT(0) +/* For governors wanting the target frequency to be set exactly */ +#define CPUFREQ_GOV_STRICT_TARGET BIT(1) + /* Pass a target to the cpufreq driver */ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, -- cgit From ea9364bbadf11f0c55802cf11387d74f524cee84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Nov 2020 18:26:37 +0100 Subject: cpufreq: Add strict_target to struct cpufreq_policy Add a new field to be set when the CPUFREQ_GOV_STRICT_TARGET flag is set for the current governor to struct cpufreq_policy, so that the drivers needing to check CPUFREQ_GOV_STRICT_TARGET do not have to access the governor object during every frequency transition. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 6eb9a3b8ec7b..acbad3b36322 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -109,6 +109,12 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Set if the CPUFREQ_GOV_STRICT_TARGET flag is set for the current + * governor. + */ + bool strict_target; + /* * Preferred average time interval between consecutive invocations of * the driver to set the frequency for this policy. To be set by the -- cgit From 3084db0e0d5076cd48408274ab0911cd3ccdae88 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Mon, 2 Nov 2020 15:23:04 -0800 Subject: kunit: fix display of failed expectations for strings Currently the following expectation KUNIT_EXPECT_STREQ(test, "hi", "bye"); will produce: Expected "hi" == "bye", but "hi" == 1625079497 "bye" == 1625079500 After this patch: Expected "hi" == "bye", but "hi" == hi "bye" == bye KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() was written but just mistakenly not actually used by KUNIT_EXPECT_STREQ() and friends. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- include/kunit/test.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index db1b0ae666c4..df60be7e22ca 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1105,7 +1105,7 @@ do { \ KUNIT_ASSERTION(test, \ strcmp(__left, __right) op 0, \ kunit_binary_str_assert, \ - KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \ + KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \ assert_type, \ #op, \ #left, \ -- cgit From 8a3c84b649b033024d2349f96234b26cbd6083a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: separate __sb_start_write into blocking and non-blocking helpers Break this function into two helpers so that it's obvious that the trylock versions return a value that must be checked, and the blocking versions don't require that. While we're at it, clean up the return type mismatch. Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- include/linux/fs.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bd126418bb6..305989afd49c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1581,7 +1581,8 @@ extern struct timespec64 current_time(struct inode *inode); */ void __sb_end_write(struct super_block *sb, int level); -int __sb_start_write(struct super_block *sb, int level, bool wait); +void __sb_start_write(struct super_block *sb, int level); +bool __sb_start_write_trylock(struct super_block *sb, int level); #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) @@ -1645,12 +1646,12 @@ static inline void sb_end_intwrite(struct super_block *sb) */ static inline void sb_start_write(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_WRITE, true); + __sb_start_write(sb, SB_FREEZE_WRITE); } -static inline int sb_start_write_trylock(struct super_block *sb) +static inline bool sb_start_write_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_WRITE, false); + return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** @@ -1674,7 +1675,7 @@ static inline int sb_start_write_trylock(struct super_block *sb) */ static inline void sb_start_pagefault(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); + __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /* @@ -1692,12 +1693,12 @@ static inline void sb_start_pagefault(struct super_block *sb) */ static inline void sb_start_intwrite(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_FS, true); + __sb_start_write(sb, SB_FREEZE_FS); } -static inline int sb_start_intwrite_trylock(struct super_block *sb) +static inline bool sb_start_intwrite_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_FS, false); + return __sb_start_write_trylock(sb, SB_FREEZE_FS); } @@ -2756,14 +2757,14 @@ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; - return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); + return sb_start_write_trylock(file_inode(file)->i_sb); } static inline void file_end_write(struct file *file) -- cgit From 9b8523423b23ee3dfd88e32f5b7207be56a4e782 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:50:21 -0800 Subject: vfs: move __sb_{start,end}_write* to fs.h Now that we've straightened out the callers, move these three functions to fs.h since they're fairly trivial. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara --- include/linux/fs.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 305989afd49c..6dabd019cab0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,9 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode); * Snapshotting support. */ -void __sb_end_write(struct super_block *sb, int level); -void __sb_start_write(struct super_block *sb, int level); -bool __sb_start_write_trylock(struct super_block *sb, int level); +/* + * These are internal functions, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +static inline void __sb_end_write(struct super_block *sb, int level) +{ + percpu_up_read(sb->s_writers.rw_sem + level-1); +} + +static inline void __sb_start_write(struct super_block *sb, int level) +{ + percpu_down_read(sb->s_writers.rw_sem + level - 1); +} + +static inline bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) -- cgit From c3213d260a23e263ef85ba21ac68c9e7578020b5 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 12 Nov 2020 15:17:32 -0500 Subject: SUNRPC: Fix oops in the rpc_xdr_buf event class Backchannel rpc tasks don't have task->tk_client set, so it's necessary to check it for NULL before dereferencing. Fixes: c509f15a5801 ("SUNRPC: Split the xdr_buf event class") Signed-off-by: Scott Mayhew Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2477014e3fa6..2a03263b5f9d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -68,7 +68,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class, TP_fast_assign( __entry->task_id = task->tk_pid; - __entry->client_id = task->tk_client->cl_clid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; __entry->head_base = xdr->head[0].iov_base; __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; -- cgit From 7e890c37c25c7cbca37ff0ab292873d8146e713b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Nov 2020 17:50:04 +0100 Subject: block: add a return value to set_capacity_revalidate_and_notify Return if the function ended up sending an uevent or not. Cc: stable@vger.kernel.org # v5.9 Signed-off-by: Christoph Hellwig Reviewed-by: Petr Vorel Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 38f23d757013..03da3f603d30 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); /* drivers/char/random.c */ -- cgit From 8cf8821e15cd553339a5b48ee555a0439c2b2742 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 12 Nov 2020 20:58:15 -0500 Subject: net: Exempt multicast addresses from five-second neighbor lifetime Commit 58956317c8de ("neighbor: Improve garbage collection") guarantees neighbour table entries a five-second lifetime. Processes which make heavy use of multicast can fill the neighour table with multicast addresses in five seconds. At that point, neighbour entries can't be GC-ed because they aren't five seconds old yet, the kernel log starts to fill up with "neighbor table overflow!" messages, and sends start to fail. This patch allows multicast addresses to be thrown out before they've lived out their five seconds. This makes room for non-multicast addresses and makes messages to all addresses more reliable in these circumstances. Fixes: 58956317c8de ("neighbor: Improve garbage collection") Signed-off-by: Jeff Dike Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20201113015815.31397-1-jdike@akamai.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 81ee17594c32..22ced1381ede 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -204,6 +204,7 @@ struct neigh_table { int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); void (*proxy_redo)(struct sk_buff *skb); + int (*is_multicast)(const void *pkey); bool (*allow_add)(const struct net_device *dev, struct netlink_ext_ack *extack); char *id; -- cgit From 9c2e14b48119b39446031d29d994044ae958d8fc Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 10 Nov 2020 16:16:40 -0800 Subject: ip_tunnels: Set tunnel option flag when tunnel metadata is present Currently, we may set the tunnel option flag when the size of metadata is zero. For example, we set TUNNEL_GENEVE_OPT in the receive function no matter the geneve option is present or not. As this may result in issues on the tunnel flags consumers, this patch fixes the issue. Related discussion: * https://lore.kernel.org/netdev/1604448694-19351-1-git-send-email-yihung.wei@gmail.com/T/#u Fixes: 256c87c17c53 ("net: check tunnel option type in tunnel flags") Signed-off-by: Yi-Hung Wei Link: https://lore.kernel.org/r/1605053800-74072-1-git-send-email-yihung.wei@gmail.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 02ccd32542d0..61620677b034 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -478,9 +478,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { - memcpy(ip_tunnel_info_opts(info), from, len); info->options_len = len; - info->key.tun_flags |= flags; + if (len > 0) { + memcpy(ip_tunnel_info_opts(info), from, len); + info->key.tun_flags |= flags; + } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) @@ -526,7 +528,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, __be16 flags) { info->options_len = 0; - info->key.tun_flags |= flags; } #endif /* CONFIG_INET */ -- cgit From 3347acc6fcd4ee71ad18a9ff9d9dac176b517329 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 13 Nov 2020 22:51:59 -0800 Subject: compiler.h: fix barrier_data() on clang Commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") neglected to copy barrier_data() from compiler-gcc.h into compiler-clang.h. The definition in compiler-gcc.h was really to work around clang's more aggressive optimization, so this broke barrier_data() on clang, and consequently memzero_explicit() as well. For example, this results in at least the memzero_explicit() call in lib/crypto/sha256.c:sha256_transform() being optimized away by clang. Fix this by moving the definition of barrier_data() into compiler.h. Also move the gcc/clang definition of barrier() into compiler.h, __memory_barrier() is icc-specific (and barrier() is already defined using it in compiler-intel.h) and doesn't belong in compiler.h. [rdunlap@infradead.org: fix ALPHA builds when SMP is not enabled] Link: https://lkml.kernel.org/r/20201101231835.4589-1-rdunlap@infradead.org Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arvind Sankar Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Reviewed-by: Kees Cook Cc: Link: https://lkml.kernel.org/r/20201014212631.207844-1-nivedita@alum.mit.edu Signed-off-by: Linus Torvalds --- include/asm-generic/barrier.h | 1 + include/linux/compiler-clang.h | 6 ------ include/linux/compiler-gcc.h | 19 ------------------- include/linux/compiler.h | 18 ++++++++++++++++-- 4 files changed, 17 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 798027bb89be..640f09479bdf 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ +#include #include #ifndef nop diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 230604e7f057..dd7233c48bf3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -60,12 +60,6 @@ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like ICC. - */ -#define barrier() __asm__ __volatile__("" : : : "memory") - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5deb37024574..74c6c0486eed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,25 +15,6 @@ # error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -/* - * This version is i.e. to prevent dead stores elimination on @ptr - * where gcc and llvm may behave differently when otherwise using - * normal barrier(): while gcc behavior gets along with a normal - * barrier(), llvm needs an explicit input variable to be assumed - * clobbered. The issue is as follows: while the inline asm might - * access any memory it wants, the compiler could have fit all of - * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proved that the inline asm wasn't touching any of - * it. This version works well with both compilers, i.e. we're telling - * the compiler that the inline asm absolutely may see the contents - * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 - */ -#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e512f5505dad..b8fe0c23cfff 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Optimization barrier */ #ifndef barrier -# define barrier() __memory_barrier() +/* The "volatile" is due to gcc bugs */ +# define barrier() __asm__ __volatile__("": : :"memory") #endif #ifndef barrier_data -# define barrier_data(ptr) barrier() +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proved that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") #endif /* workaround for GCC PR82365 if needed */ -- cgit From 8b21ca0218d29cc6bb7028125c7e5a10dfb4730c Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 13 Nov 2020 22:52:13 -0800 Subject: mm: memcontrol: fix missing wakeup polling thread When we poll the swap.events, we can miss being woken up when the swap event occurs. Because we didn't notify. Fixes: f3a53a3a1e5b ("mm, memcontrol: implement memory.swap.events") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Roman Gushchin Cc: Michal Hocko Cc: Yafang Shao Cc: Chris Down Cc: Tejun Heo Link: https://lkml.kernel.org/r/20201105161936.98312-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..a80c59af2c60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,12 +900,19 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + atomic_long_inc(&memcg->memory_events_local[event]); - cgroup_file_notify(&memcg->events_local_file); + if (!swap_event) + cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - cgroup_file_notify(&memcg->events_file); + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; -- cgit From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Nov 2020 17:52:58 +0100 Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed Stefan Agner reported a bug when using zsram on 32-bit Arm machines with RAM above the 4GB address boundary: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = a27bd01c [00000000] *pgd=236a0003, *pmd=1ffa64003 Internal error: Oops: 207 [#1] SMP ARM Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1 Hardware name: BCM2711 PC is at zs_map_object+0x94/0x338 LR is at zram_bvec_rw.constprop.0+0x330/0xa64 pc : [] lr : [] psr: 60000013 sp : e376bbe0 ip : 00000000 fp : c1e2921c r10: 00000002 r9 : c1dda730 r8 : 00000000 r7 : e8ff7a00 r6 : 00000000 r5 : 02f9ffa0 r4 : e3710000 r3 : 000fdffe r2 : c1e0ce80 r1 : ebf979a0 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5383d Table: 235c2a80 DAC: fffffffd Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6) Stack: (0xe376bbe0 to 0xe376c000) As it turns out, zsram needs to know the maximum memory size, which is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture. The same problem will be hit on all 32-bit architectures that have a physical address space larger than 4GB and happen to not enable sparsemem and include asm/sparsemem.h from asm/pgtable.h. After the initial discussion, I suggested just always defining MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is set, or provoking a build error otherwise. This addresses all configurations that can currently have this runtime bug, but leaves all other configurations unchanged. I looked up the possible number of bits in source code and datasheets, here is what I found: - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never support more than 32 bits, even though supersections in theory allow up to 40 bits as well. - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5 XPA supports up to 60 bits in theory, but 40 bits are more than anyone will ever ship - On PowerPC, there are three different implementations of 36 bit addressing, but 32-bit is used without CONFIG_PTE_64BIT - On RISC-V, the normal page table format can support 34 bit addressing. There is no highmem support on RISC-V, so anything above 2GB is unused, but it might be useful to eventually support CONFIG_ZRAM for high pages. Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library") Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS") Acked-by: Thomas Bogendoerfer Reviewed-by: Stefan Agner Tested-by: Stefan Agner Acked-by: Mike Rapoport Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/ Signed-off-by: Arnd Bergmann --- include/linux/pgtable.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..e237004d498d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ +#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) +#ifdef CONFIG_PHYS_ADDR_T_64BIT +/* + * ZSMALLOC needs to know the highest PFN on 32-bit architectures + * with physical address space extension, but falls back to + * BITS_PER_LONG otherwise. + */ +#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition +#else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#endif +#endif + #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 10 Nov 2020 17:29:32 +0800 Subject: PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter In many case, we need to check return value of pm_runtime_get_sync, but it brings a trouble to the usage counter processing. Many callers forget to decrease the usage counter when it failed, which could resulted in reference leak. It has been discussed a lot[0][1]. So we add a function to deal with the usage counter for better coding. [0]https://lkml.org/lkml/2020/6/14/88 [1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139 Signed-off-by: Zhang Qilong Acked-by: Rafael J. Wysocki Signed-off-by: Jakub Kicinski --- include/linux/pm_runtime.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4b708f4e8eed..b492ae00cc90 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev) return __pm_runtime_resume(dev, RPM_GET_PUT); } +/** + * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it. + * @dev: Target device. + * + * Resume @dev synchronously and if that is successful, increment its runtime + * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been + * incremented or a negative error code otherwise. + */ +static inline int pm_runtime_resume_and_get(struct device *dev) +{ + int ret; + + ret = __pm_runtime_resume(dev, RPM_GET_PUT); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + return 0; +} + /** * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0. * @dev: Target device. -- cgit From 9d9e937b1c8be97b424e3e11938e183fcde905c0 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Wed, 11 Nov 2020 12:50:25 +0100 Subject: ipv6/netfilter: Discard first fragment not including all headers Packets are processed even though the first fragment don't include all headers through the upper layer header. This breaks TAHI IPv6 Core Conformance Test v6LC.1.3.6. Referring to RFC8200 SECTION 4.5: "If the first fragment does not include all headers through an Upper-Layer header, then that fragment should be discarded and an ICMP Parameter Problem, Code 3, message should be sent to the source of the fragment, with the Pointer field set to zero." The fragment needs to be validated the same way it is done in commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't include all headers") for ipv6. Wrap the validation into a common function, ipv6_frag_thdr_truncated() to check for truncation in the upper layer header. This validation does not fullfill all aspects of RFC 8200, section 4.5, but is at the moment sufficient to pass mentioned TAHI test. In netfilter, utilize the fragment offset returned by find_prev_fhdr() to let ipv6_frag_thdr_truncated() start it's traverse from the fragment header. Return 0 to drop the fragment in the netfilter. This is the same behaviour as used on other protocol errors in this function, e.g. when nf_ct_frag6_queue() returns -EPROTO. The Fragment will later be picked up by ipv6_frag_rcv() in reassembly.c. ipv6_frag_rcv() will then send an appropriate ICMP Parameter Problem message back to the source. References commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't include all headers") Signed-off-by: Georg Kohmann Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20201111115025.28879-1-geokohma@cisco.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd1f396cc9c7..637cc6dd12b7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1064,6 +1064,8 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, bool ipv6_ext_hdr(u8 nexthdr); +bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp); + enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), -- cgit From fe0a8a95e7134d0b44cd407bc0085b9ba8d8fe31 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 6 Nov 2020 11:33:17 -0800 Subject: scsi: libiscsi: Fix NOP race condition iSCSI NOPs are sometimes "lost", mistakenly sent to the user-land iscsid daemon instead of handled in the kernel, as they should be, resulting in a message from the daemon like: iscsid: Got nop in, but kernel supports nop handling. This can occur because of the new forward- and back-locks, and the fact that an iSCSI NOP response can occur before processing of the NOP send is complete. This can result in "conn->ping_task" being NULL in iscsi_nop_out_rsp(), when the pointer is actually in the process of being set. To work around this, we add a new state to the "ping_task" pointer. In addition to NULL (not assigned) and a pointer (assigned), we add the state "being set", which is signaled with an INVALID pointer (using "-1"). Link: https://lore.kernel.org/r/20201106193317.16993-1-leeman.duncan@gmail.com Reviewed-by: Mike Christie Signed-off-by: Lee Duncan Signed-off-by: Martin K. Petersen --- include/scsi/libiscsi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c25fb86ffae9..b3bbd10eb3f0 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -132,6 +132,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent; -- cgit From f97bb5272d9e95d400d6c8643ebb146b3e3e7842 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 09:08:41 +0100 Subject: sched: Fix data-race in wakeup Mel reported that on some ARM64 platforms loadavg goes bananas and Will tracked it down to the following race: CPU0 CPU1 schedule() prev->sched_contributes_to_load = X; deactivate_task(prev); try_to_wake_up() if (p->on_rq &&) // false if (smp_load_acquire(&p->on_cpu) && // true ttwu_queue_wakelist()) p->sched_remote_wakeup = Y; smp_store_release(prev->on_cpu, 0); where both p->sched_contributes_to_load and p->sched_remote_wakeup are in the same word, and thus the stores X and Y race (and can clobber one another's data). Whereas prior to commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the p->on_cpu handoff serialized access to p->sched_remote_wakeup (just as it still does with p->sched_contributes_to_load) that commit broke that by calling ttwu_queue_wakelist() with p->on_cpu != 0. However, due to p->XXX = X ttwu() schedule() if (p->on_rq && ...) // false smp_mb__after_spinlock() if (smp_load_acquire(&p->on_cpu) && deactivate_task() ttwu_queue_wakelist()) p->on_rq = 0; p->sched_remote_wakeup = Y; We can be sure any 'current' store is complete and 'current' is guaranteed asleep. Therefore we can move p->sched_remote_wakeup into the current flags word. Note: while the observed failure was loadavg accounting gone wrong due to ttwu() cobbering p->sched_contributes_to_load, the reverse problem is also possible where schedule() clobbers p->sched_remote_wakeup, this could result in enqueue_entity() wrecking ->vruntime and causing scheduling artifacts. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Mel Gorman Debugged-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201117083016.GK3121392@hirez.programming.kicks-ass.net --- include/linux/sched.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d383cf09e78f..0e91b451d2a2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -769,7 +769,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; #ifdef CONFIG_PSI unsigned sched_psi_wake_requeue:1; #endif @@ -779,6 +778,21 @@ struct task_struct { /* Unserialized, strictly 'current' */ + /* + * This field must not be in the scheduler word above due to wakelist + * queueing no longer being serialized by p->on_cpu. However: + * + * p->XXX = X; ttwu() + * schedule() if (p->on_rq && ..) // false + * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true + * deactivate_task() ttwu_queue_wakelist()) + * p->on_rq = 0; p->sched_remote_wakeup = Y; + * + * guarantees all stores of 'current' are visible before + * ->sched_remote_wakeup gets used, so it can be in this word. + */ + unsigned sched_remote_wakeup:1; + /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; -- cgit From 2279f540ea7d05f22d2f0c4224319330228586bc Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 17 Nov 2020 07:14:32 +0100 Subject: sched/deadline: Fix priority inheritance with multiple scheduling classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Glenn reported that "an application [he developed produces] a BUG in deadline.c when a SCHED_DEADLINE task contends with CFS tasks on nested PTHREAD_PRIO_INHERIT mutexes. I believe the bug is triggered when a CFS task that was boosted by a SCHED_DEADLINE task boosts another CFS task (nested priority inheritance). ------------[ cut here ]------------ kernel BUG at kernel/sched/deadline.c:1462! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 12 PID: 19171 Comm: dl_boost_bug Tainted: ... Hardware name: ... RIP: 0010:enqueue_task_dl+0x335/0x910 Code: ... RSP: 0018:ffffc9000c2bbc68 EFLAGS: 00010002 RAX: 0000000000000009 RBX: ffff888c0af94c00 RCX: ffffffff81e12500 RDX: 000000000000002e RSI: ffff888c0af94c00 RDI: ffff888c10b22600 RBP: ffffc9000c2bbd08 R08: 0000000000000009 R09: 0000000000000078 R10: ffffffff81e12440 R11: ffffffff81e1236c R12: ffff888bc8932600 R13: ffff888c0af94eb8 R14: ffff888c10b22600 R15: ffff888bc8932600 FS: 00007fa58ac55700(0000) GS:ffff888c10b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa58b523230 CR3: 0000000bf44ab003 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? intel_pstate_update_util_hwp+0x13/0x170 rt_mutex_setprio+0x1cc/0x4b0 task_blocks_on_rt_mutex+0x225/0x260 rt_spin_lock_slowlock_locked+0xab/0x2d0 rt_spin_lock_slowlock+0x50/0x80 hrtimer_grab_expiry_lock+0x20/0x30 hrtimer_cancel+0x13/0x30 do_nanosleep+0xa0/0x150 hrtimer_nanosleep+0xe1/0x230 ? __hrtimer_init_sleeper+0x60/0x60 __x64_sys_nanosleep+0x8d/0xa0 do_syscall_64+0x4a/0x100 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fa58b52330d ... ---[ end trace 0000000000000002 ]— He also provided a simple reproducer creating the situation below: So the execution order of locking steps are the following (N1 and N2 are non-deadline tasks. D1 is a deadline task. M1 and M2 are mutexes that are enabled * with priority inheritance.) Time moves forward as this timeline goes down: N1 N2 D1 | | | | | | Lock(M1) | | | | | | Lock(M2) | | | | | | Lock(M2) | | | | Lock(M1) | | (!!bug triggered!) | Daniel reported a similar situation as well, by just letting ksoftirqd run with DEADLINE (and eventually block on a mutex). Problem is that boosted entities (Priority Inheritance) use static DEADLINE parameters of the top priority waiter. However, there might be cases where top waiter could be a non-DEADLINE entity that is currently boosted by a DEADLINE entity from a different lock chain (i.e., nested priority chains involving entities of non-DEADLINE classes). In this case, top waiter static DEADLINE parameters could be null (initialized to 0 at fork()) and replenish_dl_entity() would hit a BUG(). Fix this by keeping track of the original donor and using its parameters when a task is boosted. Reported-by: Glenn Elliott Reported-by: Daniel Bristot de Oliveira Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Tested-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201117061432.517340-1-juri.lelli@redhat.com --- include/linux/sched.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e91b451d2a2..095fdec07b38 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -551,7 +551,6 @@ struct sched_dl_entity { * overruns. */ unsigned int dl_throttled : 1; - unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; @@ -570,6 +569,15 @@ struct sched_dl_entity { * time. */ struct hrtimer inactive_timer; + +#ifdef CONFIG_RT_MUTEXES + /* + * Priority Inheritance. When a DEADLINE scheduling entity is boosted + * pi_se points to the donor, otherwise points to the dl_se it belongs + * to (the original one/itself). + */ + struct sched_dl_entity *pi_se; +#endif }; #ifdef CONFIG_UCLAMP_TASK -- cgit From 138559b9f99d3b6b1d5e75c78facc067a23871c6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 15 Nov 2020 15:14:48 +0200 Subject: net/tls: Fix wrong record sn in async mode of device resync In async_resync mode, we log the TCP seq of records until the async request is completed. Later, in case one of the logged seqs matches the resync request, we return it, together with its record serial number. Before this fix, we mistakenly returned the serial number of the current record instead. Fixes: ed9b7646b06a ("net/tls: Add asynchronous resync") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Link: https://lore.kernel.org/r/20201115131448.2702-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index baf1e99d8193..cf1473099453 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -300,7 +300,8 @@ enum tls_offload_sync_type { #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX 13 struct tls_offload_resync_async { atomic64_t req; - u32 loglen; + u16 loglen; + u16 rcd_delta; u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX]; }; @@ -471,6 +472,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len) return (i == -1); } +static inline void tls_bigint_subtract(unsigned char *seq, int n) +{ + u64 rcd_sn; + __be64 *p; + + BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8); + + p = (__be64 *)seq; + rcd_sn = be64_to_cpu(*p); + *p = cpu_to_be64(rcd_sn - n); +} + static inline struct tls_context *tls_get_ctx(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -639,6 +652,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); rx_ctx->resync_async->loglen = 0; + rx_ctx->resync_async->rcd_delta = 0; } static inline void -- cgit From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Tue, 10 Nov 2020 15:19:08 +0800 Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system "intel_iommu=off" command line is used to disable iommu but iommu is force enabled in a tboot system for security reason. However for better performance on high speed network device, a new option "intel_iommu=tboot_noforce" is introduced to disable the force on. By default kernel should panic if iommu init fail in tboot for security reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off". Fix the code setting force_on and move intel_iommu_tboot_noforce from tboot code to intel iommu code. Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on") Signed-off-by: Zhenzhong Duan Tested-by: Lukasz Hawrylko Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com Signed-off-by: Will Deacon --- include/linux/intel-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fbf5b3e7707e..d956987ed032 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -- cgit From 2d8f6481c17db9fa5238b277cdbc392084060b09 Mon Sep 17 00:00:00 2001 From: Georg Kohmann Date: Thu, 19 Nov 2020 10:58:33 +0100 Subject: ipv6: Remove dependency of ipv6_frag_thdr_truncated on ipv6 module IPV6=m NF_DEFRAG_IPV6=y ld: net/ipv6/netfilter/nf_conntrack_reasm.o: in function `nf_ct_frag6_gather': net/ipv6/netfilter/nf_conntrack_reasm.c:462: undefined reference to `ipv6_frag_thdr_truncated' Netfilter is depending on ipv6 symbol ipv6_frag_thdr_truncated. This dependency is forcing IPV6=y. Remove this dependency by moving ipv6_frag_thdr_truncated out of ipv6. This is the same solution as used with a similar issues: Referring to commit 70b095c843266 ("ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module") Fixes: 9d9e937b1c8b ("ipv6/netfilter: Discard first fragment not including all headers") Reported-by: Randy Dunlap Reported-by: kernel test robot Signed-off-by: Georg Kohmann Acked-by: Pablo Neira Ayuso Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20201119095833.8409-1-geokohma@cisco.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 2 -- include/net/ipv6_frag.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 637cc6dd12b7..bd1f396cc9c7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1064,8 +1064,6 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp, bool ipv6_ext_hdr(u8 nexthdr); -bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp); - enum { IP6_FH_F_FRAG = (1 << 0), IP6_FH_F_AUTH = (1 << 1), diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index a21e8b1381a1..851029ecff13 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -108,5 +108,35 @@ out_rcu_unlock: rcu_read_unlock(); inet_frag_put(&fq->q); } + +/* Check if the upper layer header is truncated in the first fragment. */ +static inline bool +ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp) +{ + u8 nexthdr = *nexthdrp; + __be16 frag_off; + int offset; + + offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off); + if (offset < 0 || (frag_off & htons(IP6_OFFSET))) + return false; + switch (nexthdr) { + case NEXTHDR_TCP: + offset += sizeof(struct tcphdr); + break; + case NEXTHDR_UDP: + offset += sizeof(struct udphdr); + break; + case NEXTHDR_ICMP: + offset += sizeof(struct icmp6hdr); + break; + default: + offset += 1; + } + if (offset > skb->len) + return true; + return false; +} + #endif #endif -- cgit From 2bf31d94423c8ae3ff58e38a115b177df6940399 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Nov 2020 11:18:08 +0100 Subject: jbd2: fix kernel-doc markups Kernel-doc markup should use this format: identifier - description They should not have any type before that, as otherwise the parser won't do the right thing. Also, some identifiers have different names between their prototypes and the kernel-doc markup. Reviewed-by: Jan Kara Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1c49fd62ff2e..578ff196b3ce 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -401,7 +401,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * struct jbd2_inode - The jbd_inode type is the structure linking inodes in * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { -- cgit From b9ad3e9f5a7a760ab068e33e1f18d240ba32ce92 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 20 Nov 2020 14:28:27 +0000 Subject: bonding: wait for sysfs kobject destruction before freeing struct slave syzkaller found that with CONFIG_DEBUG_KOBJECT_RELEASE=y, releasing a struct slave device could result in the following splat: kobject: 'bonding_slave' (00000000cecdd4fe): kobject_release, parent 0000000074ceb2b2 (delayed 1000) bond0 (unregistering): (slave bond_slave_1): Releasing backup interface ------------[ cut here ]------------ ODEBUG: free active (active state 0) object type: timer_list hint: workqueue_select_cpu_near kernel/workqueue.c:1549 [inline] ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x98 kernel/workqueue.c:1600 WARNING: CPU: 1 PID: 842 at lib/debugobjects.c:485 debug_print_object+0x180/0x240 lib/debugobjects.c:485 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 842 Comm: kworker/u4:4 Tainted: G S 5.9.0-rc8+ #96 Hardware name: linux,dummy-virt (DT) Workqueue: netns cleanup_net Call trace: dump_backtrace+0x0/0x4d8 include/linux/bitmap.h:239 show_stack+0x34/0x48 arch/arm64/kernel/traps.c:142 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x174/0x1f8 lib/dump_stack.c:118 panic+0x360/0x7a0 kernel/panic.c:231 __warn+0x244/0x2ec kernel/panic.c:600 report_bug+0x240/0x398 lib/bug.c:198 bug_handler+0x50/0xc0 arch/arm64/kernel/traps.c:974 call_break_hook+0x160/0x1d8 arch/arm64/kernel/debug-monitors.c:322 brk_handler+0x30/0xc0 arch/arm64/kernel/debug-monitors.c:329 do_debug_exception+0x184/0x340 arch/arm64/mm/fault.c:864 el1_dbg+0x48/0xb0 arch/arm64/kernel/entry-common.c:65 el1_sync_handler+0x170/0x1c8 arch/arm64/kernel/entry-common.c:93 el1_sync+0x80/0x100 arch/arm64/kernel/entry.S:594 debug_print_object+0x180/0x240 lib/debugobjects.c:485 __debug_check_no_obj_freed lib/debugobjects.c:967 [inline] debug_check_no_obj_freed+0x200/0x430 lib/debugobjects.c:998 slab_free_hook mm/slub.c:1536 [inline] slab_free_freelist_hook+0x190/0x210 mm/slub.c:1577 slab_free mm/slub.c:3138 [inline] kfree+0x13c/0x460 mm/slub.c:4119 bond_free_slave+0x8c/0xf8 drivers/net/bonding/bond_main.c:1492 __bond_release_one+0xe0c/0xec8 drivers/net/bonding/bond_main.c:2190 bond_slave_netdev_event drivers/net/bonding/bond_main.c:3309 [inline] bond_netdev_event+0x8f0/0xa70 drivers/net/bonding/bond_main.c:3420 notifier_call_chain+0xf0/0x200 kernel/notifier.c:83 __raw_notifier_call_chain kernel/notifier.c:361 [inline] raw_notifier_call_chain+0x44/0x58 kernel/notifier.c:368 call_netdevice_notifiers_info+0xbc/0x150 net/core/dev.c:2033 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline] call_netdevice_notifiers net/core/dev.c:2059 [inline] rollback_registered_many+0x6a4/0xec0 net/core/dev.c:9347 unregister_netdevice_many.part.0+0x2c/0x1c0 net/core/dev.c:10509 unregister_netdevice_many net/core/dev.c:10508 [inline] default_device_exit_batch+0x294/0x338 net/core/dev.c:10992 ops_exit_list.isra.0+0xec/0x150 net/core/net_namespace.c:189 cleanup_net+0x44c/0x888 net/core/net_namespace.c:603 process_one_work+0x96c/0x18c0 kernel/workqueue.c:2269 worker_thread+0x3f0/0xc30 kernel/workqueue.c:2415 kthread+0x390/0x498 kernel/kthread.c:292 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:925 This is a potential use-after-free if the sysfs nodes are being accessed whilst removing the struct slave, so wait for the object destruction to complete before freeing the struct slave itself. Fixes: 07699f9a7c8d ("bonding: add sysfs /slave dir for bond slave devices.") Fixes: a068aab42258 ("bonding: Fix reference count leak in bond_sysfs_slave_add.") Cc: Qiushi Wu Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Jamie Iles Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201120142827.879226-1-jamie@nuviainc.com Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/bonding.h b/include/net/bonding.h index 7d132cc1e584..d9d0ff3b0ad3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -185,6 +185,11 @@ struct slave { struct rtnl_link_stats64 slave_stats; }; +static inline struct slave *to_slave(struct kobject *kobj) +{ + return container_of(kobj, struct slave, kobj); +} + struct bond_up_slave { unsigned int count; struct rcu_head rcu; @@ -750,6 +755,9 @@ extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; +/* exported from bond_sysfs_slave.c */ +extern const struct sysfs_ops slave_sysfs_ops; + static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { atomic_long_inc(&dev->tx_dropped); -- cgit From bc2dc4406c463174613047d8b7946e12c8808cda Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 21 Nov 2020 22:17:01 -0800 Subject: compiler-clang: remove version check for BPF Tracing bpftrace parses the kernel headers and uses Clang under the hood. Remove the version check when __BPF_TRACING__ is defined (as bpftrace does) so that this tool can continue to parse kernel headers, even with older clang sources. Fixes: commit 1f7a44f63e6c ("compiler-clang: add build check for clang 10.0.1") Reported-by: Chen Yu Reported-by: Jarkko Sakkinen Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Acked-by: Song Liu Acked-by: Nathan Chancellor Acked-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20201104191052.390657-1-ndesaulniers@google.com Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index dd7233c48bf3..98cff1b4b088 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -8,8 +8,10 @@ + __clang_patchlevel__) #if CLANG_VERSION < 100001 +#ifndef __BPF_TRACING__ # error Sorry, your version of Clang is too old - please use 10.0.1 or newer. #endif +#endif /* Compiler specific definitions for Clang compiler */ -- cgit From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 21 Nov 2020 22:17:05 -0800 Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid() exports The core-mm has a default __weak implementation of phys_to_target_node() to mirror the weak definition of memory_add_physaddr_to_nid(). That symbol is exported for modules. However, while the export in mm/memory_hotplug.c exported the symbol in the configuration cases of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=y ...and: CONFIG_NUMA_KEEP_MEMINFO=n CONFIG_MEMORY_HOTPLUG=y ...it failed to export the symbol in the case of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=n Not only is that broken, but Christoph points out that the kernel should not be exporting any __weak symbol, which means that memory_add_physaddr_to_nid() example that phys_to_target_node() copied is broken too. Rework the definition of phys_to_target_node() and memory_add_physaddr_to_nid() to not require weak symbols. Move to the common arch override design-pattern of an asm header defining a symbol to replace the default implementation. The only common header that all memory_add_physaddr_to_nid() producing architectures implement is asm/sparsemem.h. In fact, powerpc already defines its memory_add_physaddr_to_nid() helper in sparsemem.h. Double-down on that observation and define phys_to_target_node() where necessary in asm/sparsemem.h. An alternate consideration that was discarded was to put this override in asm/numa.h, but that entangles with the definition of MAX_NUMNODES relative to the inclusion of linux/nodemask.h, and requires powerpc to grow a new header. The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid now that the symbol is properly exported / stubbed in all combinations of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG. [dan.j.williams@intel.com: v4] Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com [dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning] Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation") Reported-by: Randy Dunlap Reported-by: Thomas Gleixner Reported-by: kernel test robot Reported-by: Christoph Hellwig Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Tested-by: Randy Dunlap Tested-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Cc: Joao Martins Cc: Tony Luck Cc: Fenghua Yu Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Vishal Verma Cc: Stephen Rothwell Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 14 -------------- include/linux/numa.h | 30 +++++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..551093b74596 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -extern int phys_to_target_node(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -static inline int phys_to_target_node(u64 start) -{ - return 0; -} -#endif - #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index 8cb33ccfb671..cb44cfe2b725 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -21,13 +21,41 @@ #endif #ifdef CONFIG_NUMA +#include +#include + /* Generic implementation available */ int numa_map_to_online_node(int node); -#else + +#ifndef memory_add_physaddr_to_nid +static inline int memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#ifndef phys_to_target_node +static inline int phys_to_target_node(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} #endif #endif /* _LINUX_NUMA_H */ -- cgit From 4349a83a3190c1d4414371161b0f4a4c3ccd3f9d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 21 Nov 2020 22:17:08 -0800 Subject: mm: fix readahead_page_batch for retry entries Both btrfs and fuse have reported faults caused by seeing a retry entry instead of the page they were looking for. This was caused by a missing check in the iterator. As can be seen in the below panic log, the accessing 0x402 causes a panic. In the xarray.h, 0x402 means RETRY_ENTRY. BUG: kernel NULL pointer dereference, address: 0000000000000402 CPU: 14 PID: 306003 Comm: as Not tainted 5.9.0-1-amd64 #1 Debian 5.9.1-1 Hardware name: Lenovo ThinkSystem SR665/7D2VCTO1WW, BIOS D8E106Q-1.01 05/30/2020 RIP: 0010:fuse_readahead+0x152/0x470 [fuse] Code: 41 8b 57 18 4c 8d 54 10 ff 4c 89 d6 48 8d 7c 24 10 e8 d2 e3 28 f9 48 85 c0 0f 84 fe 00 00 00 44 89 f2 49 89 04 d4 44 8d 72 01 <48> 8b 10 41 8b 4f 1c 48 c1 ea 10 83 e2 01 80 fa 01 19 d2 81 e2 01 RSP: 0018:ffffad99ceaebc50 EFLAGS: 00010246 RAX: 0000000000000402 RBX: 0000000000000001 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff94c5af90bd98 RDI: ffffad99ceaebc60 RBP: ffff94ddc1749a00 R08: 0000000000000402 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000100 R12: ffff94de6c429ce0 R13: ffff94de6c4d3700 R14: 0000000000000001 R15: ffffad99ceaebd68 FS: 00007f228c5c7040(0000) GS:ffff94de8ed80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000402 CR3: 0000001dbd9b4000 CR4: 0000000000350ee0 Call Trace: read_pages+0x83/0x270 page_cache_readahead_unbounded+0x197/0x230 generic_file_buffered_read+0x57a/0xa20 new_sync_read+0x112/0x1a0 vfs_read+0xf8/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 042124cc64c3 ("mm: add new readahead_control API") Reported-by: David Sterba Reported-by: Wonhyuk Yang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: Link: https://lkml.kernel.org/r/20201103142852.8543-1-willy@infradead.org Link: https://lkml.kernel.org/r/20201103124349.16722-1-vvghjk1234@gmail.com Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e1e19c1f9ec9..d5570deff400 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -906,6 +906,8 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, page)) + continue; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; -- cgit From 01770a166165738a6e05c3d911fb4609cc4eb416 Mon Sep 17 00:00:00 2001 From: Ricardo Dias Date: Fri, 20 Nov 2020 11:11:33 +0000 Subject: tcp: fix race condition when creating child sockets from syncookies When the TCP stack is in SYN flood mode, the server child socket is created from the SYN cookie received in a TCP packet with the ACK flag set. The child socket is created when the server receives the first TCP packet with a valid SYN cookie from the client. Usually, this packet corresponds to the final step of the TCP 3-way handshake, the ACK packet. But is also possible to receive a valid SYN cookie from the first TCP data packet sent by the client, and thus create a child socket from that SYN cookie. Since a client socket is ready to send data as soon as it receives the SYN+ACK packet from the server, the client can send the ACK packet (sent by the TCP stack code), and the first data packet (sent by the userspace program) almost at the same time, and thus the server will equally receive the two TCP packets with valid SYN cookies almost at the same instant. When such event happens, the TCP stack code has a race condition that occurs between the momement a lookup is done to the established connections hashtable to check for the existence of a connection for the same client, and the moment that the child socket is added to the established connections hashtable. As a consequence, this race condition can lead to a situation where we add two child sockets to the established connections hashtable and deliver two sockets to the userspace program to the same client. This patch fixes the race condition by checking if an existing child socket exists for the same client when we are adding the second child socket to the established connections socket. If an existing child socket exists, we drop the packet and discard the second child socket to the same client. Signed-off-by: Ricardo Dias Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20201120111133.GA67501@rdias-suse-pc.lan Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 92560974ea67..ca6a3ea9057e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long high_limit); int inet_hashinfo2_init_mod(struct inet_hashinfo *h); -bool inet_ehash_insert(struct sock *sk, struct sock *osk); -bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); +bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); +bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, + bool *found_dup_sk); int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); -- cgit From d549699048b4b5c22dd710455bcdb76966e55aa3 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sat, 21 Nov 2020 08:28:17 +0200 Subject: net/packet: fix packet receive on L3 devices without visible hard header In the patchset merged by commit b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which did not have header_ops were given one for the purpose of protocol parsing on af_packet transmit path. That change made af_packet receive path regard these devices as having a visible L3 header and therefore aligned incoming skb->data to point to the skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not reset their mac_header prior to ingress and therefore their incoming packets became malformed. Ideally these devices would reset their mac headers, or af_packet would be able to rely on dev->hard_header_len being 0 for such cases, but it seems this is not the case. Fix by changing af_packet RX ll visibility criteria to include the existence of a '.create()' header operation, which is used when creating a device hard header - via dev_hard_header() - by upper layers, and does not exist in these L3 devices. As this predicate may be useful in other situations, add it as a common dev_has_header() helper in netdevice.h. Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'") Signed-off-by: Eyal Birger Acked-by: Jason A. Donenfeld Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 964b494b0e8d..fa275a054f46 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3137,6 +3137,11 @@ static inline bool dev_validate_header(const struct net_device *dev, return false; } +static inline bool dev_has_header(const struct net_device *dev) +{ + return dev->header_ops && dev->header_ops->create; +} + typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len, int size); int register_gifconf(unsigned int family, gifconf_func_t *gifconf); -- cgit From acfdd18591eaac25446e976a0c0d190f8b3dbfb1 Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Mon, 23 Nov 2020 21:52:41 -0800 Subject: firmware: xilinx: Use hash-table for api feature check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently array of fix length PM_API_MAX is used to cache the pm_api version (valid or invalid). However ATF based PM APIs values are much higher then PM_API_MAX. So to include ATF based PM APIs also, use hash-table to store the pm_api version status. Signed-off-by: Amit Sunil Dhamne Reported-by: Arnd Bergmann  Signed-off-by: Ravi Patel Signed-off-by: Rajan Vaja Reviewed-by: Arnd Bergmann Tested-by: Michal Simek Fixes: f3217d6f2f7a ("firmware: xilinx: fix out-of-bounds access") Cc: stable Link: https://lore.kernel.org/r/1606197161-25976-1-git-send-email-rajan.vaja@xilinx.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 5968df82b991..41a1bab98b7e 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -50,10 +50,6 @@ #define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U #define ZYNQMP_PM_CAPABILITY_UNUSABLE 0x8U -/* Feature check status */ -#define PM_FEATURE_INVALID -1 -#define PM_FEATURE_UNCHECKED 0 - /* * Firmware FPGA Manager flags * XILINX_ZYNQMP_PM_FPGA_FULL: FPGA full reconfiguration -- cgit From 5204bb683c1633e550c2124ccc2358dd645a80db Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 23 Nov 2020 07:36:25 +0200 Subject: devlink: Fix reload stats structure Fix reload stats structure exposed to the user. Change stats structure hierarchy to have the reload action as a parent of the stat entry and then stat entry includes value per limit. This will also help to avoid string concatenation on iproute2 output. Reload stats structure before this fix: "stats": { "reload": { "driver_reinit": 2, "fw_activate": 1, "fw_activate_no_reset": 0 } } After this fix: "stats": { "reload": { "driver_reinit": { "unspecified": 2 }, "fw_activate": { "unspecified": 1, "no_reset": 0 } } Fixes: a254c264267e ("devlink: Add reload stats") Signed-off-by: Moshe Shemesh Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/1606109785-25197-1-git-send-email-moshe@mellanox.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 0113bc4db9f5..5203f54a2be1 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -526,6 +526,8 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_STATS_LIMIT, /* u8 */ DEVLINK_ATTR_RELOAD_STATS_VALUE, /* u32 */ DEVLINK_ATTR_REMOTE_RELOAD_STATS, /* nested */ + DEVLINK_ATTR_RELOAD_ACTION_INFO, /* nested */ + DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ /* add new attributes above here, update the policy in devlink.c */ -- cgit From fdeb17c70c9ecae655378761accf5a26a55a33cf Mon Sep 17 00:00:00 2001 From: Hui Su Date: Wed, 25 Nov 2020 00:52:05 +0800 Subject: trace: fix potenial dangerous pointer The bdi_dev_name() returns a char [64], and the __entry->name is a char [32]. It maybe dangerous to TP_printk("%s", __entry->name) after the strncpy(). CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201124165205.GA23937@rlk Acked-by: Steven Rostedt (VMware) Acked-by: Tejun Heo Signed-off-by: Hui Su Signed-off-by: Jan Kara --- include/trace/events/writeback.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index e7cbccc7c14c..57d795365987 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -190,7 +190,7 @@ TRACE_EVENT(inode_foreign_history, ), TP_fast_assign( - strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); + strscpy_pad(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); __entry->history = history; @@ -219,7 +219,7 @@ TRACE_EVENT(inode_switch_wbs, ), TP_fast_assign( - strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32); + strscpy_pad(__entry->name, bdi_dev_name(old_wb->bdi), 32); __entry->ino = inode->i_ino; __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); @@ -252,7 +252,7 @@ TRACE_EVENT(track_foreign_dirty, struct address_space *mapping = page_mapping(page); struct inode *inode = mapping ? mapping->host : NULL; - strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->bdi_id = wb->bdi->id; __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; @@ -285,7 +285,7 @@ TRACE_EVENT(flush_foreign, ), TP_fast_assign( - strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->frn_bdi_id = frn_bdi_id; __entry->frn_memcg_id = frn_memcg_id; -- cgit From 025cc2fb6a4e84e9a0552c0017dcd1c24b7ac7da Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 25 Nov 2020 14:18:10 -0800 Subject: net/tls: Protect from calling tls_dev_del for TLS RX twice tls_device_offload_cleanup_rx doesn't clear tls_ctx->netdev after calling tls_dev_del if TLX TX offload is also enabled. Clearing tls_ctx->netdev gets postponed until tls_device_gc_task. It leaves a time frame when tls_device_down may get called and call tls_dev_del for RX one extra time, confusing the driver, which may lead to a crash. This patch corrects this racy behavior by adding a flag to prevent tls_device_down from calling tls_dev_del the second time. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20201125221810.69870-1-saeedm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index cf1473099453..2bdd802212fe 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -199,6 +199,12 @@ enum tls_context_flags { * to be atomic. */ TLS_TX_SYNC_SCHED = 1, + /* tls_dev_del was called for the RX side, device state was released, + * but tls_ctx->netdev might still be kept, because TX-side driver + * resources might not be released yet. Used to prevent the second + * tls_dev_del call in tls_device_down if it happens simultaneously. + */ + TLS_RX_DEV_CLOSED = 2, }; struct cipher_context { -- cgit From 4df910620bebb5cfe234af16ac8f6474b60215fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 25 Nov 2020 13:22:21 +0800 Subject: mm: memcg: relayout structure mem_cgroup to avoid cache interference 0day reported one -22.7% regression for will-it-scale page_fault2 case [1] on a 4 sockets 144 CPU platform, and bisected to it to be caused by Waiman's optimization (commit bd0b230fe1) of saving one 'struct page_counter' space for 'struct mem_cgroup'. Initially we thought it was due to the cache alignment change introduced by the patch, but further debug shows that it is due to some hot data members ('vmstats_local', 'vmstats_percpu', 'vmstats') sit in 2 adjacent cacheline (2N and 2N+1 cacheline), and when adjacent cache line prefetch is enabled, it triggers an "extended level" of cache false sharing for 2 adjacent cache lines. So exchange the 2 member blocks, while keeping mostly the original cache alignment, which can restore and even enhance the performance, and save 64 bytes of space for 'struct mem_cgroup' (from 2880 to 2816, with 0day's default RHEL-8.3 kernel config) [1]. https://lore.kernel.org/lkml/20201102091543.GM31092@shao2-debian/ Fixes: bd0b230fe145 ("mm/memcg: unify swap and memsw page counters") Reported-by: kernel test robot Signed-off-by: Feng Tang Acked-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a80c59af2c60..922a7f600465 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -282,20 +282,6 @@ struct mem_cgroup { MEMCG_PADDING(_pad1_); - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - struct task_struct *move_lock_task; - - /* Legacy local VM stats and events */ - struct memcg_vmstats_percpu __percpu *vmstats_local; - - /* Subtree VM stats and events (batched updates) */ - struct memcg_vmstats_percpu __percpu *vmstats_percpu; - - MEMCG_PADDING(_pad2_); - atomic_long_t vmstats[MEMCG_NR_STAT]; atomic_long_t vmevents[NR_VM_EVENT_ITEMS]; @@ -317,6 +303,20 @@ struct mem_cgroup { struct list_head objcg_list; /* list of inherited objcgs */ #endif + MEMCG_PADDING(_pad2_); + + /* + * set > 0 if pages under this cgroup are moving to other cgroup. + */ + atomic_t moving_account; + struct task_struct *move_lock_task; + + /* Legacy local VM stats and events */ + struct memcg_vmstats_percpu __percpu *vmstats_local; + + /* Subtree VM stats and events (batched updates) */ + struct memcg_vmstats_percpu __percpu *vmstats_percpu; + #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; struct wb_domain cgwb_domain; -- cgit From 69929d4c49e182f8526d42c43b37b460d562d3a0 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 24 Nov 2020 07:34:44 -0500 Subject: net: openvswitch: fix TTL decrement action netlink message format Currently, the openvswitch module is not accepting the correctly formated netlink message for the TTL decrement action. For both setting and getting the dec_ttl action, the actions should be nested in the OVS_DEC_TTL_ATTR_ACTION attribute as mentioned in the openvswitch.h uapi. When the original patch was sent, it was tested with a private OVS userspace implementation. This implementation was unfortunately not upstreamed and reviewed, hence an erroneous version of this patch was sent out. Leaving the patch as-is would cause problems as the kernel module could interpret additional attributes as actions and vice-versa, due to the actions not being encapsulated/nested within the actual attribute, but being concatinated after it. Fixes: 744676e77720 ("openvswitch: add TTL decrement action") Signed-off-by: Eelco Chaudron Link: https://lore.kernel.org/r/160622121495.27296.888010441924340582.stgit@wsfd-netdev64.ntdv.lab.eng.bos.redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 8300cc29dec8..8d16744edc31 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -1058,4 +1058,6 @@ enum ovs_dec_ttl_attr { __OVS_DEC_TTL_ATTR_MAX }; +#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1) + #endif /* _LINUX_OPENVSWITCH_H */ -- cgit