From 901a891fa6235b68d4c007800ea91c1a091f6f99 Mon Sep 17 00:00:00 2001 From: Stefan Lengfeld Date: Wed, 1 Nov 2017 21:06:41 +0100 Subject: i2c: use macro IS_ENABLED in header i2c.h Using the macro IS_ENABLED to check the option CONFIG_I2C=(y|m) makes the code nicer. No functional change. Signed-off-by: Stefan Lengfeld Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 0f774406fad0..5857236919cf 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -55,7 +55,7 @@ typedef int (*i2c_slave_cb_t)(struct i2c_client *, enum i2c_slave_event, u8 *); struct module; struct property_entry; -#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) +#if IS_ENABLED(CONFIG_I2C) /* * The master routines are the ones normally used to transmit data to devices * on a bus (or read from them). Apart from two basic transfer functions to @@ -354,7 +354,7 @@ struct i2c_board_info { .type = dev_type, .addr = (dev_addr) -#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) +#if IS_ENABLED(CONFIG_I2C) /* Add-on boards should register/unregister their devices; e.g. a board * with integrated I2C, a config eeprom, sensors, and a codec that's * used in conjunction with the primary hardware. @@ -706,7 +706,7 @@ i2c_unlock_adapter(struct i2c_adapter *adapter) /* administration... */ -#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) +#if IS_ENABLED(CONFIG_I2C) extern int i2c_add_adapter(struct i2c_adapter *); extern void i2c_del_adapter(struct i2c_adapter *); extern int i2c_add_numbered_adapter(struct i2c_adapter *); -- cgit From 3991c5c80beaf7eb9bce61e0b2f8f449e351a38e Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 2 Nov 2017 10:40:24 +0800 Subject: i2c: Switch to using gpiod interface for gpio bus recovery Currently the i2c gpio recovery code uses gpio integer interface instead of the gpiod. This change switch the core code to use the gpiod while still retaining compatibility with the gpio integer interface. This will allow individual driver to be updated and tested individual to switch to using the gpiod interface. Reviewed-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Phil Reid Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 5857236919cf..bf62c4a97a09 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -499,6 +499,8 @@ struct i2c_timings { * may configure padmux here for SDA/SCL line or something else they want. * @scl_gpio: gpio number of the SCL line. Only required for GPIO recovery. * @sda_gpio: gpio number of the SDA line. Only required for GPIO recovery. + * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. + * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. */ struct i2c_bus_recovery_info { int (*recover_bus)(struct i2c_adapter *); @@ -513,6 +515,8 @@ struct i2c_bus_recovery_info { /* gpio recovery */ int scl_gpio; int sda_gpio; + struct gpio_desc *scl_gpiod; + struct gpio_desc *sda_gpiod; }; int i2c_recover_bus(struct i2c_adapter *adap); -- cgit From e1eb7d28c0753ec3e5ff9dce7880c243ffdfd4b3 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 2 Nov 2017 10:40:30 +0800 Subject: i2c: remove legacy integer scl/sda gpio for recovery Remove all reference to code related to using integer based ids for scl/sda gpio for bus recovery. All in tree drivers are now using the gpio descriptors to specific the required gpios. Reviewed-by: Andy Shevchenko Signed-off-by: Phil Reid Reviewed-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index bf62c4a97a09..a556db976fc6 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -485,7 +485,7 @@ struct i2c_timings { /** * struct i2c_bus_recovery_info - I2C bus recovery information * @recover_bus: Recover routine. Either pass driver's recover_bus() routine, or - * i2c_generic_scl_recovery() or i2c_generic_gpio_recovery(). + * i2c_generic_scl_recovery(). * @get_scl: This gets current value of SCL line. Mandatory for generic SCL * recovery. Used internally for generic GPIO recovery. * @set_scl: This sets/clears SCL line. Mandatory for generic SCL recovery. Used @@ -497,8 +497,6 @@ struct i2c_timings { * configure padmux here for SDA/SCL line or something else they want. * @unprepare_recovery: This will be called after completing recovery. Platform * may configure padmux here for SDA/SCL line or something else they want. - * @scl_gpio: gpio number of the SCL line. Only required for GPIO recovery. - * @sda_gpio: gpio number of the SDA line. Only required for GPIO recovery. * @scl_gpiod: gpiod of the SCL line. Only required for GPIO recovery. * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. */ @@ -513,8 +511,6 @@ struct i2c_bus_recovery_info { void (*unprepare_recovery)(struct i2c_adapter *); /* gpio recovery */ - int scl_gpio; - int sda_gpio; struct gpio_desc *scl_gpiod; struct gpio_desc *sda_gpiod; }; @@ -522,7 +518,6 @@ struct i2c_bus_recovery_info { int i2c_recover_bus(struct i2c_adapter *adap); /* Generic recovery routines */ -int i2c_generic_gpio_recovery(struct i2c_adapter *adap); int i2c_generic_scl_recovery(struct i2c_adapter *adap); /** -- cgit From 876d1b53c5bfecc6a4d0fc7ea8bf5f288535322f Mon Sep 17 00:00:00 2001 From: Cole Robinson Date: Sat, 25 Nov 2017 19:41:32 -0500 Subject: rtc: Remove unused RTC_DEVICE_NAME_SIZE The last usage was removed in 5c82a6ae0 when rtc_device.name was removed Signed-off-by: Cole Robinson Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 41319a2e409b..fc6c90b57be0 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -87,7 +87,6 @@ struct rtc_class_ops { int (*set_offset)(struct device *, long offset); }; -#define RTC_DEVICE_NAME_SIZE 20 typedef struct rtc_task { void (*func)(void *private_data); void *private_data; -- cgit From e94bc5d18be03dac8e9d73d30c5523728edeff76 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 4 Nov 2017 21:20:02 +0100 Subject: i2c: add helpers to ease DMA handling One helper checks if DMA is suitable and optionally creates a bounce buffer, if not. The other function returns the bounce buffer and makes sure the data is properly copied back to the message. Reviewed-by: Jonathan Cameron Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index a556db976fc6..9d9d379b5a15 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -768,6 +768,9 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); } +u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); +void i2c_release_dma_safe_msg_buf(struct i2c_msg *msg, u8 *buf); + int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); /** * module_i2c_driver() - Helper macro for registering a modular I2C driver -- cgit From 8a91732b3b33454d8034e7be5c8342f028ea772e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 4 Nov 2017 21:20:04 +0100 Subject: i2c: refactor i2c_master_{send_recv} Those two functions are very similar, the only differences are that one needs the I2C_M_RD flag for its message while the other one needs the buffer casted to drop the const. Introduce a generic helper which allows to specify the flags (also needed later for DMA safe variants of these calls) and let the casting be done in the inlining functions which are now calling the new helper function. Signed-off-by: Wolfram Sang Reviewed-by: Jonathan Cameron Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9d9d379b5a15..5ac0f9055715 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -63,10 +63,36 @@ struct property_entry; * transmit an arbitrary number of messages without interruption. * @count must be be less than 64k since msg.len is u16. */ -extern int i2c_master_send(const struct i2c_client *client, const char *buf, - int count); -extern int i2c_master_recv(const struct i2c_client *client, char *buf, - int count); +extern int i2c_transfer_buffer_flags(const struct i2c_client *client, + char *buf, int count, u16 flags); + +/** + * i2c_master_recv - issue a single I2C message in master receive mode + * @client: Handle to slave device + * @buf: Where to store data read from slave + * @count: How many bytes to read, must be less than 64k since msg.len is u16 + * + * Returns negative errno, or else the number of bytes read. + */ +static inline int i2c_master_recv(const struct i2c_client *client, + char *buf, int count) +{ + return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD); +}; + +/** + * i2c_master_send - issue a single I2C message in master transmit mode + * @client: Handle to slave device + * @buf: Data that will be written to the slave + * @count: How many bytes to write, must be less than 64k since msg.len is u16 + * + * Returns negative errno, or else the number of bytes written. + */ +static inline int i2c_master_send(const struct i2c_client *client, + const char *buf, int count) +{ + return i2c_transfer_buffer_flags(client, (char *)buf, count, 0); +}; /* Transfer num messages. */ -- cgit From ba98645c7d54640f163096cda3609d4d55c6ae54 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 4 Nov 2017 21:20:05 +0100 Subject: i2c: add i2c_master_{send|recv}_dmasafe Use the new helper to create variants of i2c_master_{send|recv} which mark their buffers as DMA safe. Signed-off-by: Wolfram Sang Acked-by: Jonathan Cameron Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 5ac0f9055715..5d7f3c1853ae 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -80,6 +80,22 @@ static inline int i2c_master_recv(const struct i2c_client *client, return i2c_transfer_buffer_flags(client, buf, count, I2C_M_RD); }; +/** + * i2c_master_recv_dmasafe - issue a single I2C message in master receive mode + * using a DMA safe buffer + * @client: Handle to slave device + * @buf: Where to store data read from slave, must be safe to use with DMA + * @count: How many bytes to read, must be less than 64k since msg.len is u16 + * + * Returns negative errno, or else the number of bytes read. + */ +static inline int i2c_master_recv_dmasafe(const struct i2c_client *client, + char *buf, int count) +{ + return i2c_transfer_buffer_flags(client, buf, count, + I2C_M_RD | I2C_M_DMA_SAFE); +}; + /** * i2c_master_send - issue a single I2C message in master transmit mode * @client: Handle to slave device @@ -94,6 +110,22 @@ static inline int i2c_master_send(const struct i2c_client *client, return i2c_transfer_buffer_flags(client, (char *)buf, count, 0); }; +/** + * i2c_master_send_dmasafe - issue a single I2C message in master transmit mode + * using a DMA safe buffer + * @client: Handle to slave device + * @buf: Data that will be written to the slave, must be safe to use with DMA + * @count: How many bytes to write, must be less than 64k since msg.len is u16 + * + * Returns negative errno, or else the number of bytes written. + */ +static inline int i2c_master_send_dmasafe(const struct i2c_client *client, + const char *buf, int count) +{ + return i2c_transfer_buffer_flags(client, (char *)buf, count, + I2C_M_DMA_SAFE); +}; + /* Transfer num messages. */ extern int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, -- cgit From 608c0d8804ef3ca4cda8ec6ad914e47deb283d7b Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Thu, 9 Nov 2017 08:00:35 -0600 Subject: PCI/IOV: Add pci_vf_drivers_autoprobe() interface Add a pci_vf_drivers_autoprobe() interface. Setting autoprobe to false on the PF prevents drivers from binding to VFs when they are enabled. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas Acked-by: Russell Currey Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0403894147a3..e3e94467687a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1961,6 +1961,7 @@ int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); #else static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) { @@ -1988,6 +1989,7 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) { return 0; } static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -- cgit From 55de88778f4bfe6333db4e475afb15ef413b4874 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 1 Dec 2017 01:10:11 +0100 Subject: ARM: 8726/1: B15: Add CPU hotplug awareness The Broadcom Brahma-B15 readahead cache needs to be disabled, respectively re-enable during a CPU hotplug. In case we were not to do, CPU hotplug would occasionally fail with random crashes when a given CPU exits the coherency domain while the RAC is still enabled, as it would get stale data from the RAC. In order to avoid adding any specific B15 readahead-cache awareness to arch/arm/mach-bcm/hotplug-brcmstb.c we use a CPU hotplug state machine which allows us to catch CPU hotplug events and disable/flush enable the RAC accordingly. Signed-off-by: Alamy Liu Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- include/linux/cpuhotplug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 201ab7267986..3f7b30fcc96a 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -59,6 +59,7 @@ enum cpuhp_state { CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, + CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, @@ -137,6 +138,7 @@ enum cpuhp_state { CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_X86_TBOOT_DYING, + CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, -- cgit From 3eff6d2cc995e5a4cffb7a35dc368f02f39c4cd3 Mon Sep 17 00:00:00 2001 From: Stephen Barber Date: Fri, 10 Nov 2017 22:55:52 +0100 Subject: mfd: cros_ec: Introduce RTC commands and events definitions. The EC can function as a simple RT, this patch adds the RTC related definitions needed by the rtc-cros-ec driver. Signed-off-by: Stephen Barber Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Acked-by: Benson Leung Reviewed-by: Brian Norris Signed-off-by: Alexandre Belloni --- include/linux/mfd/cros_ec_commands.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 2b16e95b9bb8..c6046a2e94d5 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -291,6 +291,9 @@ enum host_event_code { /* EC desires to change state of host-controlled USB mux */ EC_HOST_EVENT_USB_MUX = 28, + /* EC RTC event occurred */ + EC_HOST_EVENT_RTC = 26, + /* * The high bit of the event mask is not used as a host event code. If * it reads back as set, then the entire event mask should be @@ -799,6 +802,8 @@ enum ec_feature_code { EC_FEATURE_USB_MUX = 23, /* Motion Sensor code has an internal software FIFO */ EC_FEATURE_MOTION_SENSE_FIFO = 24, + /* EC has RTC feature that can be controlled by host commands */ + EC_FEATURE_RTC = 27, }; #define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32)) @@ -1709,6 +1714,9 @@ struct ec_response_rtc { #define EC_CMD_RTC_SET_VALUE 0x46 #define EC_CMD_RTC_SET_ALARM 0x47 +/* Pass as param to SET_ALARM to clear the current alarm */ +#define EC_RTC_ALARM_CLEAR 0 + /*****************************************************************************/ /* Port80 log access */ -- cgit From e53537653791b4aaa9ec88a9eb5fd187d44ff6ac Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 20 Dec 2017 13:17:49 +0100 Subject: i2c/ARM: davinci: Deep refactoring of I2C recovery Alter the DaVinci GPIO recovery fetch to use descriptors all the way down into the board files. Signed-off-by: Linus Walleij Acked-by: Sekhar Nori Tested-by: Sekhar Nori Acked-by: Arnd Bergmann Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-davinci.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-davinci.h b/include/linux/platform_data/i2c-davinci.h index 89fd34727a24..98967df07468 100644 --- a/include/linux/platform_data/i2c-davinci.h +++ b/include/linux/platform_data/i2c-davinci.h @@ -16,9 +16,8 @@ struct davinci_i2c_platform_data { unsigned int bus_freq; /* standard bus frequency (kHz) */ unsigned int bus_delay; /* post-transaction delay (usec) */ - unsigned int sda_pin; /* GPIO pin ID to use for SDA */ - unsigned int scl_pin; /* GPIO pin ID to use for SCL */ - bool has_pfunc; /*chip has a ICPFUNC register */ + bool gpio_recovery; /* Use GPIO recovery method */ + bool has_pfunc; /* Chip has a ICPFUNC register */ }; /* for board setup code */ -- cgit From e32213fbc5432c28268dced0dc8735dcf8532d36 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Fri, 8 Dec 2017 11:28:30 -0500 Subject: eeprom: at24: support eeproms that do not auto-rollover reads Some multi-address eeproms in the at24 family may not automatically roll-over reads to the next slave address. On those eeproms, reads that straddle slave boundaries will not work correctly. Solution: Mark such eeproms with a flag that prevents reads straddling slave boundaries. Add the AT24_FLAG_NO_RDROL flag to the eeprom entry in the device_id table, or add 'no-read-rollover' to the eeprom devicetree entry. Note that I have not personally enountered an at24 chip that does not support read rollovers. They may or may not exist. However, my hardware requires this functionality because of a quirk. Signed-off-by: Sven Van Asbroeck Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/at24.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/at24.h b/include/linux/platform_data/at24.h index 271a4e25af67..841bb2815a41 100644 --- a/include/linux/platform_data/at24.h +++ b/include/linux/platform_data/at24.h @@ -50,6 +50,8 @@ struct at24_platform_data { #define AT24_FLAG_TAKE8ADDR BIT(4) /* take always 8 addresses (24c00) */ #define AT24_FLAG_SERIAL BIT(3) /* factory-programmed serial number */ #define AT24_FLAG_MAC BIT(2) /* factory-programmed mac address */ +#define AT24_FLAG_NO_RDROL BIT(1) /* does not auto-rollover reads to */ + /* the next slave address */ void (*setup)(struct nvmem_device *nvmem, void *context); void *context; -- cgit From 98fb3a34736dec1e14e43382c0df30f815560e5f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 21 Dec 2017 17:53:09 +0100 Subject: eeprom: at24: fix a whitespace error in platform data Replace spaces with tabs in the definition of AT24_FLAG_NO_RDROL. Fixes: 9d404411091c ("eeprom: at24: support eeproms that do not auto-rollover reads") Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/at24.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/at24.h b/include/linux/platform_data/at24.h index 841bb2815a41..63507ff464ee 100644 --- a/include/linux/platform_data/at24.h +++ b/include/linux/platform_data/at24.h @@ -50,7 +50,7 @@ struct at24_platform_data { #define AT24_FLAG_TAKE8ADDR BIT(4) /* take always 8 addresses (24c00) */ #define AT24_FLAG_SERIAL BIT(3) /* factory-programmed serial number */ #define AT24_FLAG_MAC BIT(2) /* factory-programmed mac address */ -#define AT24_FLAG_NO_RDROL BIT(1) /* does not auto-rollover reads to */ +#define AT24_FLAG_NO_RDROL BIT(1) /* does not auto-rollover reads to */ /* the next slave address */ void (*setup)(struct nvmem_device *nvmem, void *context); -- cgit From c0b24625979284dd212423320fe1c84fe244ed7f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 7 Jan 2018 16:38:43 -0500 Subject: dax: pass detailed error code from dax_iomap_fault() Ext4 needs to pass through error from its iomap handler to the page fault handler so that it can properly detect ENOSPC and force transaction commit and retry the fault (and block allocation). Add argument to dax_iomap_fault() for passing such error. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 5258346c558c..0185ecdae135 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -96,7 +96,7 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - pfn_t *pfnp, const struct iomap_ops *ops); + pfn_t *pfnp, int *errp, const struct iomap_ops *ops); int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); -- cgit From f69120ce6c024aa634a8fc25787205e42f0ccbe6 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Wed, 10 Jan 2018 00:27:29 -0500 Subject: jbd2: fix sphinx kernel-doc build warnings Sphinx emits various (26) warnings when building make target 'htmldocs'. Currently struct definitions contain duplicate documentation, some as kernel-docs and some as standard c89 comments. We can reduce duplication while cleaning up the kernel docs. Move all kernel-docs to right above each struct member. Use the set of all existing comments (kernel-doc and c89). Add documentation for missing struct members and function arguments. Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/jbd2.h | 431 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 268 insertions(+), 163 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 296d1e0ea87b..b708e5169d1d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -418,26 +418,41 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) /** - * struct jbd_inode is the structure linking inodes in ordered mode - * present in a transaction so that we can sync them during commit. + * struct jbd_inode - The jbd_inode type is the structure linking inodes in + * ordered mode present in a transaction so that we can sync them during commit. */ struct jbd2_inode { - /* Which transaction does this inode belong to? Either the running - * transaction or the committing one. [j_list_lock] */ + /** + * @i_transaction: + * + * Which transaction does this inode belong to? Either the running + * transaction or the committing one. [j_list_lock] + */ transaction_t *i_transaction; - /* Pointer to the running transaction modifying inode's data in case - * there is already a committing transaction touching it. [j_list_lock] */ + /** + * @i_next_transaction: + * + * Pointer to the running transaction modifying inode's data in case + * there is already a committing transaction touching it. [j_list_lock] + */ transaction_t *i_next_transaction; - /* List of inodes in the i_transaction [j_list_lock] */ + /** + * @i_list: List of inodes in the i_transaction [j_list_lock] + */ struct list_head i_list; - /* VFS inode this inode belongs to [constant during the lifetime - * of the structure] */ + /** + * @i_vfs_inode: + * + * VFS inode this inode belongs to [constant for lifetime of structure] + */ struct inode *i_vfs_inode; - /* Flags of inode [j_list_lock] */ + /** + * @i_flags: Flags of inode [j_list_lock] + */ unsigned long i_flags; }; @@ -447,12 +462,20 @@ struct jbd2_revoke_table_s; * struct handle_s - The handle_s type is the concrete type associated with * handle_t. * @h_transaction: Which compound transaction is this update a part of? + * @h_journal: Which journal handle belongs to - used iff h_reserved set. + * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. - * @h_ref: Reference count on this handle - * @h_err: Field for caller's use to track errors through large fs operations - * @h_sync: flag for sync-on-close - * @h_jdata: flag to force data journaling - * @h_aborted: flag indicating fatal error on handle + * @h_ref: Reference count on this handle. + * @h_err: Field for caller's use to track errors through large fs operations. + * @h_sync: Flag for sync-on-close. + * @h_jdata: Flag to force data journaling. + * @h_reserved: Flag for handle for reserved credits. + * @h_aborted: Flag indicating fatal error on handle. + * @h_type: For handle statistics. + * @h_line_no: For handle statistics. + * @h_start_jiffies: Handle Start time. + * @h_requested_credits: Holds @h_buffer_credits after handle is started. + * @saved_alloc_context: Saved context while transaction is open. **/ /* Docbook can't yet cope with the bit fields, but will leave the documentation @@ -462,32 +485,23 @@ struct jbd2_revoke_table_s; struct jbd2_journal_handle { union { - /* Which compound transaction is this update a part of? */ transaction_t *h_transaction; /* Which journal handle belongs to - used iff h_reserved set */ journal_t *h_journal; }; - /* Handle reserved for finishing the logical operation */ handle_t *h_rsv_handle; - - /* Number of remaining buffers we are allowed to dirty: */ int h_buffer_credits; - - /* Reference count on this handle */ int h_ref; - - /* Field for caller's use to track errors through large fs */ - /* operations */ int h_err; /* Flags [no locking] */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ - unsigned int h_reserved: 1; /* handle with reserved credits */ - unsigned int h_aborted: 1; /* fatal error on handle */ - unsigned int h_type: 8; /* for handle statistics */ - unsigned int h_line_no: 16; /* for handle statistics */ + unsigned int h_sync: 1; + unsigned int h_jdata: 1; + unsigned int h_reserved: 1; + unsigned int h_aborted: 1; + unsigned int h_type: 8; + unsigned int h_line_no: 16; unsigned long h_start_jiffies; unsigned int h_requested_credits; @@ -729,228 +743,253 @@ jbd2_time_diff(unsigned long start, unsigned long end) /** * struct journal_s - The journal_s type is the concrete type associated with * journal_t. - * @j_flags: General journaling state flags - * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? - * @j_sb_buffer: First part of superblock buffer - * @j_superblock: Second part of superblock buffer - * @j_format_version: Version of the superblock format - * @j_state_lock: Protect the various scalars in the journal - * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself - * @j_running_transaction: The current running transaction.. - * @j_committing_transaction: the transaction we are pushing to disk - * @j_checkpoint_transactions: a linked circular list of all transactions - * waiting for checkpointing - * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction - * to start committing, or for a barrier lock to be released - * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_commit: Wait queue to trigger commit - * @j_wait_updates: Wait queue to wait for updates to complete - * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop - * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints - * @j_head: Journal head - identifies the first unused block in the journal - * @j_tail: Journal tail - identifies the oldest still-used block in the - * journal. - * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block - * @j_last: The block number one beyond the last usable block - * @j_dev: Device where we store the journal - * @j_blocksize: blocksize for the location where we store the journal. - * @j_blk_offset: starting block offset for into the device where we store the - * journal - * @j_fs_dev: Device which holds the client fs. For internal journal this will - * be equal to j_dev - * @j_reserved_credits: Number of buffers reserved from the running transaction - * @j_maxlen: Total maximum capacity of the journal region on disk. - * @j_list_lock: Protects the buffer lists and internal buffer state. - * @j_inode: Optional inode where we store the journal. If present, all journal - * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log - * @j_transaction_sequence: Sequence number of the next transaction to grant - * @j_commit_sequence: Sequence number of the most recently committed - * transaction - * @j_commit_request: Sequence number of the most recent transaction wanting - * commit - * @j_uuid: Uuid of client object. - * @j_task: Pointer to the current commit thread for this journal - * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a - * single compound commit transaction - * @j_commit_interval: What is the maximum transaction lifetime before we begin - * a commit? - * @j_commit_timer: The timer used to wakeup the commit thread - * @j_revoke_lock: Protect the revoke table - * @j_revoke: The revoke table - maintains the list of revoked blocks in the - * current transaction. - * @j_revoke_table: alternate revoke tables for j_revoke - * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction - * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the - * number that will fit in j_blocksize - * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_history_lock: Protect the transactions statistics history - * @j_proc_entry: procfs entry for the jbd statistics directory - * @j_stats: Overall statistics - * @j_private: An opaque pointer to fs-private information. - * @j_trans_commit_map: Lockdep entity to track transaction commit dependencies */ - struct journal_s { - /* General journaling state flags [j_state_lock] */ + /** + * @j_flags: General journaling state flags [j_state_lock] + */ unsigned long j_flags; - /* + /** + * @j_errno: + * * Is there an outstanding uncleared error on the journal (from a prior * abort)? [j_state_lock] */ int j_errno; - /* The superblock buffer */ + /** + * @j_sb_buffer: The first part of the superblock buffer. + */ struct buffer_head *j_sb_buffer; + + /** + * @j_superblock: The second part of the superblock buffer. + */ journal_superblock_t *j_superblock; - /* Version of the superblock format */ + /** + * @j_format_version: Version of the superblock format. + */ int j_format_version; - /* - * Protect the various scalars in the journal + /** + * @j_state_lock: Protect the various scalars in the journal. */ rwlock_t j_state_lock; - /* + /** + * @j_barrier_count: + * * Number of processes waiting to create a barrier lock [j_state_lock] */ int j_barrier_count; - /* The barrier lock itself */ + /** + * @j_barrier: The barrier lock itself. + */ struct mutex j_barrier; - /* + /** + * @j_running_transaction: + * * Transactions: The current running transaction... * [j_state_lock] [caller holding open handle] */ transaction_t *j_running_transaction; - /* + /** + * @j_committing_transaction: + * * the transaction we are pushing to disk * [j_state_lock] [caller holding open handle] */ transaction_t *j_committing_transaction; - /* + /** + * @j_checkpoint_transactions: + * * ... and a linked circular list of all transactions waiting for * checkpointing. [j_list_lock] */ transaction_t *j_checkpoint_transactions; - /* + /** + * @j_wait_transaction_locked: + * * Wait queue for waiting for a locked transaction to start committing, - * or for a barrier lock to be released + * or for a barrier lock to be released. */ wait_queue_head_t j_wait_transaction_locked; - /* Wait queue for waiting for commit to complete */ + /** + * @j_wait_done_commit: Wait queue for waiting for commit to complete. + */ wait_queue_head_t j_wait_done_commit; - /* Wait queue to trigger commit */ + /** + * @j_wait_commit: Wait queue to trigger commit. + */ wait_queue_head_t j_wait_commit; - /* Wait queue to wait for updates to complete */ + /** + * @j_wait_updates: Wait queue to wait for updates to complete. + */ wait_queue_head_t j_wait_updates; - /* Wait queue to wait for reserved buffer credits to drop */ + /** + * @j_wait_reserved: + * + * Wait queue to wait for reserved buffer credits to drop. + */ wait_queue_head_t j_wait_reserved; - /* Semaphore for locking against concurrent checkpoints */ + /** + * @j_checkpoint_mutex: + * + * Semaphore for locking against concurrent checkpoints. + */ struct mutex j_checkpoint_mutex; - /* + /** + * @j_chkpt_bhs: + * * List of buffer heads used by the checkpoint routine. This * was moved from jbd2_log_do_checkpoint() to reduce stack * usage. Access to this array is controlled by the - * j_checkpoint_mutex. [j_checkpoint_mutex] + * @j_checkpoint_mutex. [j_checkpoint_mutex] */ struct buffer_head *j_chkpt_bhs[JBD2_NR_BATCH]; - - /* + + /** + * @j_head: + * * Journal head: identifies the first unused block in the journal. * [j_state_lock] */ unsigned long j_head; - /* + /** + * @j_tail: + * * Journal tail: identifies the oldest still-used block in the journal. * [j_state_lock] */ unsigned long j_tail; - /* + /** + * @j_free: + * * Journal free: how many free blocks are there in the journal? * [j_state_lock] */ unsigned long j_free; - /* - * Journal start and end: the block numbers of the first usable block - * and one beyond the last usable block in the journal. [j_state_lock] + /** + * @j_first: + * + * The block number of the first usable block in the journal + * [j_state_lock]. */ unsigned long j_first; + + /** + * @j_last: + * + * The block number one beyond the last usable block in the journal + * [j_state_lock]. + */ unsigned long j_last; - /* - * Device, blocksize and starting block offset for the location where we - * store the journal. + /** + * @j_dev: Device where we store the journal. */ struct block_device *j_dev; + + /** + * @j_blocksize: Block size for the location where we store the journal. + */ int j_blocksize; + + /** + * @j_blk_offset: + * + * Starting block offset into the device where we store the journal. + */ unsigned long long j_blk_offset; + + /** + * @j_devname: Journal device name. + */ char j_devname[BDEVNAME_SIZE+24]; - /* + /** + * @j_fs_dev: + * * Device which holds the client fs. For internal journal this will be * equal to j_dev. */ struct block_device *j_fs_dev; - /* Total maximum capacity of the journal region on disk. */ + /** + * @j_maxlen: Total maximum capacity of the journal region on disk. + */ unsigned int j_maxlen; - /* Number of buffers reserved from the running transaction */ + /** + * @j_reserved_credits: + * + * Number of buffers reserved from the running transaction. + */ atomic_t j_reserved_credits; - /* - * Protects the buffer lists and internal buffer state. + /** + * @j_list_lock: Protects the buffer lists and internal buffer state. */ spinlock_t j_list_lock; - /* Optional inode where we store the journal. If present, all */ - /* journal block numbers are mapped into this inode via */ - /* bmap(). */ + /** + * @j_inode: + * + * Optional inode where we store the journal. If present, all + * journal block numbers are mapped into this inode via bmap(). + */ struct inode *j_inode; - /* + /** + * @j_tail_sequence: + * * Sequence number of the oldest transaction in the log [j_state_lock] */ tid_t j_tail_sequence; - /* + /** + * @j_transaction_sequence: + * * Sequence number of the next transaction to grant [j_state_lock] */ tid_t j_transaction_sequence; - /* + /** + * @j_commit_sequence: + * * Sequence number of the most recently committed transaction * [j_state_lock]. */ tid_t j_commit_sequence; - /* + /** + * @j_commit_request: + * * Sequence number of the most recent transaction wanting commit * [j_state_lock] */ tid_t j_commit_request; - /* + /** + * @j_uuid: + * * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single @@ -958,85 +997,151 @@ struct journal_s */ __u8 j_uuid[16]; - /* Pointer to the current commit thread for this journal */ + /** + * @j_task: Pointer to the current commit thread for this journal. + */ struct task_struct *j_task; - /* + /** + * @j_max_transaction_buffers: + * * Maximum number of metadata buffers to allow in a single compound - * commit transaction + * commit transaction. */ int j_max_transaction_buffers; - /* + /** + * @j_commit_interval: + * * What is the maximum transaction lifetime before we begin a commit? */ unsigned long j_commit_interval; - /* The timer used to wakeup the commit thread: */ + /** + * @j_commit_timer: The timer used to wakeup the commit thread. + */ struct timer_list j_commit_timer; - /* - * The revoke table: maintains the list of revoked blocks in the - * current transaction. [j_revoke_lock] + /** + * @j_revoke_lock: Protect the revoke table. */ spinlock_t j_revoke_lock; + + /** + * @j_revoke: + * + * The revoke table - maintains the list of revoked blocks in the + * current transaction. + */ struct jbd2_revoke_table_s *j_revoke; + + /** + * @j_revoke_table: Alternate revoke tables for j_revoke. + */ struct jbd2_revoke_table_s *j_revoke_table[2]; - /* - * array of bhs for jbd2_journal_commit_transaction + /** + * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. */ struct buffer_head **j_wbuf; + + /** + * @j_wbufsize: + * + * Size of @j_wbuf array. + */ int j_wbufsize; - /* - * this is the pid of hte last person to run a synchronous operation - * through the journal + /** + * @j_last_sync_writer: + * + * The pid of the last person to run a synchronous operation + * through the journal. */ pid_t j_last_sync_writer; - /* - * the average amount of time in nanoseconds it takes to commit a + /** + * @j_average_commit_time: + * + * The average amount of time in nanoseconds it takes to commit a * transaction to disk. [j_state_lock] */ u64 j_average_commit_time; - /* - * minimum and maximum times that we should wait for - * additional filesystem operations to get batched into a - * synchronous handle in microseconds + /** + * @j_min_batch_time: + * + * Minimum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. */ u32 j_min_batch_time; + + /** + * @j_max_batch_time: + * + * Maximum time that we should wait for additional filesystem operations + * to get batched into a synchronous handle in microseconds. + */ u32 j_max_batch_time; - /* This function is called when a transaction is closed */ + /** + * @j_commit_callback: + * + * This function is called when a transaction is closed. + */ void (*j_commit_callback)(journal_t *, transaction_t *); /* * Journal statistics */ + + /** + * @j_history_lock: Protect the transactions statistics history. + */ spinlock_t j_history_lock; + + /** + * @j_proc_entry: procfs entry for the jbd statistics directory. + */ struct proc_dir_entry *j_proc_entry; + + /** + * @j_stats: Overall statistics. + */ struct transaction_stats_s j_stats; - /* Failed journal commit ID */ + /** + * @j_failed_commit: Failed journal commit ID. + */ unsigned int j_failed_commit; - /* + /** + * @j_private: + * * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here + * superblock pointer here. */ void *j_private; - /* Reference to checksum algorithm driver via cryptoapi */ + /** + * @j_chksum_driver: + * + * Reference to checksum algorithm driver via cryptoapi. + */ struct crypto_shash *j_chksum_driver; - /* Precomputed journal UUID checksum for seeding other checksums */ + /** + * @j_csum_seed: + * + * Precomputed journal UUID checksum for seeding other checksums. + */ __u32 j_csum_seed; #ifdef CONFIG_DEBUG_LOCK_ALLOC - /* + /** + * @j_trans_commit_map: + * * Lockdep entity to track transaction commit dependencies. Handles * hold this "lock" for read, when we wait for commit, we acquire the * "lock" for writing. This matches the properties of jbd2 journalling -- cgit From 3d463f28564618805713658c6aeb786fa23f420b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:52 -0800 Subject: fscrypt: move fscrypt_has_encryption_key() to supp/notsupp headers fscrypt_has_encryption_key() is already split into two versions depending on whether the filesystem is being built with encryption support or not. Move them into the appropriate headers. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 10 ---------- include/linux/fscrypt_notsupp.h | 5 +++++ include/linux/fscrypt_supp.h | 5 +++++ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 08b4b40c5aa8..d1c891b5bd9c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -129,11 +129,6 @@ static inline struct page *fscrypt_control_page(struct page *page) return ((struct fscrypt_ctx *)page_private(page))->w.control_page; } -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return (inode->i_crypt_info != NULL); -} - #include #else /* !__FS_HAS_ENCRYPTION */ @@ -144,11 +139,6 @@ static inline struct page *fscrypt_control_page(struct page *page) return ERR_PTR(-EINVAL); } -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return 0; -} - #include #endif /* __FS_HAS_ENCRYPTION */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 63e58808519a..52e330285457 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -14,6 +14,11 @@ #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return false; +} + /* crypto.c */ static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index cf9e9fc02f0a..79bb8beae018 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -11,6 +11,11 @@ #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); -- cgit From 4fd4b15ccbc79d512ad7982fc1a7ecd34703398f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:53 -0800 Subject: fscrypt: move fscrypt_control_page() to supp/notsupp headers fscrypt_control_page() is already split into two versions depending on whether the filesystem is being built with encryption support or not. Move them into the appropriate headers. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 18 ++---------------- include/linux/fscrypt_notsupp.h | 5 +++++ include/linux/fscrypt_supp.h | 6 ++++++ 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d1c891b5bd9c..c23b2f16129a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -123,24 +123,10 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) } #if __FS_HAS_ENCRYPTION - -static inline struct page *fscrypt_control_page(struct page *page) -{ - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -} - #include - -#else /* !__FS_HAS_ENCRYPTION */ - -static inline struct page *fscrypt_control_page(struct page *page) -{ - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -} - +#else #include -#endif /* __FS_HAS_ENCRYPTION */ +#endif /** * fscrypt_require_key - require an inode's encryption key diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 52e330285457..812dc701a5b3 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -48,6 +48,11 @@ static inline int fscrypt_decrypt_page(const struct inode *inode, return -EOPNOTSUPP; } +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} static inline void fscrypt_restore_control_page(struct page *page) { diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 79bb8beae018..ad40780d4653 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -25,6 +25,12 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, u64, gfp_t); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + extern void fscrypt_restore_control_page(struct page *); extern const struct dentry_operations fscrypt_d_ops; -- cgit From e4de782a0960b9620c084881f12fb5e73ebff82b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:54 -0800 Subject: fscrypt: move fscrypt_info_cachep declaration to fscrypt_private.h The fscrypt_info kmem_cache is internal to fscrypt; filesystems don't need to access it. So move its declaration into fscrypt_private.h. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_supp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index ad40780d4653..33d641e27c18 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -17,7 +17,6 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) } /* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, -- cgit From 542060c02cdb5c0740fd7156651463e321a859a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:55 -0800 Subject: fscrypt: move fscrypt_ctx declaration to fscrypt_supp.h Filesystems only ever access 'struct fscrypt_ctx' through fscrypt functions. But when a filesystem is built without encryption support, these functions are all stubbed out, so the declaration of fscrypt_ctx is unneeded. Therefore, move it from fscrypt.h to fscrypt_supp.h. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 16 +--------------- include/linux/fscrypt_supp.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c23b2f16129a..0f94d087a6d1 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -24,23 +24,9 @@ #define FS_CRYPTO_BLOCK_SIZE 16 +struct fscrypt_ctx; struct fscrypt_info; -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - /** * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 33d641e27c18..fd6ee089ced0 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -11,6 +11,21 @@ #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + static inline bool fscrypt_has_encryption_key(const struct inode *inode) { return (inode->i_crypt_info != NULL); -- cgit From 1493651b53b4811960b6220a340929074b58a55b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:56 -0800 Subject: fscrypt: split fscrypt_dummy_context_enabled() into supp/notsupp versions fscrypt_dummy_context_enabled() accesses ->s_cop, which now is only set when the filesystem is built with encryption support. This didn't actually matter because no filesystems called it. However, it will start being used soon, so fix it by moving it from fscrypt.h to fscrypt_supp.h and stubbing it out in fscrypt_notsupp.h. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 8 -------- include/linux/fscrypt_notsupp.h | 5 +++++ include/linux/fscrypt_supp.h | 6 ++++++ 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 0f94d087a6d1..b671a4eef47f 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -75,14 +75,6 @@ struct fscrypt_operations { /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - static inline bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) { diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 812dc701a5b3..81e02201b215 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -19,6 +19,11 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return false; } +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return false; +} + /* crypto.c */ static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index fd6ee089ced0..e7dfa2974906 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -31,6 +31,12 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return (inode->i_crypt_info != NULL); } +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode); +} + /* crypto.c */ extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); -- cgit From bdd234764769a267794f275ce96706a466d376d7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:57 -0800 Subject: fscrypt: move fscrypt_operations declaration to fscrypt_supp.h Filesystems now only define their fscrypt_operations when they are compiled with encryption support, so move the fscrypt_operations declaration from fscrypt.h to fscrypt_supp.h. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 18 ------------------ include/linux/fscrypt_supp.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b671a4eef47f..33b95a91f720 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -54,24 +54,6 @@ struct fscrypt_name { #define fname_name(p) ((p)->disk_name.name) #define fname_len(p) ((p)->disk_name.len) -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index e7dfa2974906..ce61caf26f40 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -11,6 +11,24 @@ #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto operations for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + struct fscrypt_ctx { union { struct { -- cgit From bb8179e5a8509876415c0eac6f6ba8a130b3cb47 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:58 -0800 Subject: fscrypt: move fscrypt_valid_enc_modes() to fscrypt_private.h The encryption modes are validated by fs/crypto/, not by individual filesystems. Therefore, move fscrypt_valid_enc_modes() from fscrypt.h to fscrypt_private.h. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 33b95a91f720..2e4dce0365cf 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -57,20 +57,6 @@ struct fscrypt_name { /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') -- cgit From dcf0db9e5df369461c9d55282abbf66d263ef2db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:44:59 -0800 Subject: fscrypt: move fscrypt_is_dot_dotdot() to fs/crypto/fname.c Only fs/crypto/fname.c cares about treating the "." and ".." filenames specially with regards to encryption, so move fscrypt_is_dot_dotdot() from fscrypt.h to there. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 2e4dce0365cf..3045fc49d3ca 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -57,17 +57,6 @@ struct fscrypt_name { /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - #if __FS_HAS_ENCRYPTION #include #else -- cgit From a575784c6c13b8f1bae05fbba873e326ec73e289 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:45:00 -0800 Subject: fscrypt: trim down fscrypt.h includes fscrypt.h included way too many other headers, given that it is included by filesystems both with and without encryption support. Trim down the includes list by moving the needed includes into more appropriate places, and removing the unneeded ones. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 6 ------ include/linux/fscrypt_supp.h | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 3045fc49d3ca..071ebabfc287 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -14,13 +14,7 @@ #ifndef _LINUX_FSCRYPT_H #define _LINUX_FSCRYPT_H -#include #include -#include -#include -#include -#include -#include #define FS_CRYPTO_BLOCK_SIZE 16 diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index ce61caf26f40..562a9bc04560 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -11,6 +11,9 @@ #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H +#include +#include + /* * fscrypt superblock flags */ -- cgit From 76e81d6d50481144824237e6843122824b0a55c0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:45:01 -0800 Subject: fscrypt: new helper functions for ->symlink() Currently, filesystems supporting fscrypt need to implement some tricky logic when creating encrypted symlinks, including handling a peculiar on-disk format (struct fscrypt_symlink_data) and correctly calculating the size of the encrypted symlink. Introduce helper functions to make things a bit easier: - fscrypt_prepare_symlink() computes and validates the size the symlink target will require on-disk. - fscrypt_encrypt_symlink() creates the encrypted target if needed. The new helpers actually fix some subtle bugs. First, when checking whether the symlink target was too long, filesystems didn't account for the fact that the NUL padding is meant to be truncated if it would cause the maximum length to be exceeded, as is done for filenames in directories. Consequently users would receive ENAMETOOLONG when creating symlinks close to what is supposed to be the maximum length. For example, with EXT4 with a 4K block size, the maximum symlink target length in an encrypted directory is supposed to be 4093 bytes (in comparison to 4095 in an unencrypted directory), but in FS_POLICY_FLAGS_PAD_32-mode only up to 4064 bytes were accepted. Second, symlink targets of "." and ".." were not being encrypted, even though they should be, as these names are special in *directory entries* but not in symlink targets. Fortunately, we can fix this simply by starting to encrypt them, as old kernels already accept them in encrypted form. Third, the output string length the filesystems were providing when doing the actual encryption was incorrect, as it was forgotten to exclude 'sizeof(struct fscrypt_symlink_data)'. Fortunately though, this bug didn't make a difference. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 64 +++++++++++++++++++++++++++++++++++++++++ include/linux/fscrypt_notsupp.h | 16 +++++++++++ include/linux/fscrypt_supp.h | 6 ++++ 3 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 071ebabfc287..6a678d0e956a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -196,4 +196,68 @@ static inline int fscrypt_prepare_setattr(struct dentry *dentry, return 0; } +/** + * fscrypt_prepare_symlink - prepare to create a possibly-encrypted symlink + * @dir: directory in which the symlink is being created + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @max_len: space the filesystem has available to store the symlink target + * @disk_link: (out) the on-disk symlink target being prepared + * + * This function computes the size the symlink target will require on-disk, + * stores it in @disk_link->len, and validates it against @max_len. An + * encrypted symlink may be longer than the original. + * + * Additionally, @disk_link->name is set to @target if the symlink will be + * unencrypted, but left NULL if the symlink will be encrypted. For encrypted + * symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the + * on-disk target later. (The reason for the two-step process is that some + * filesystems need to know the size of the symlink target before creating the + * inode, e.g. to determine whether it will be a "fast" or "slow" symlink.) + * + * Return: 0 on success, -ENAMETOOLONG if the symlink target is too long, + * -ENOKEY if the encryption key is missing, or another -errno code if a problem + * occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_symlink(struct inode *dir, + const char *target, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(dir) || fscrypt_dummy_context_enabled(dir)) + return __fscrypt_prepare_symlink(dir, len, max_len, disk_link); + + disk_link->name = (unsigned char *)target; + disk_link->len = len + 1; + if (disk_link->len > max_len) + return -ENAMETOOLONG; + return 0; +} + +/** + * fscrypt_encrypt_symlink - encrypt the symlink target if needed + * @inode: symlink inode + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @disk_link: (in/out) the on-disk symlink target being prepared + * + * If the symlink target needs to be encrypted, then this function encrypts it + * into @disk_link->name. fscrypt_prepare_symlink() must have been called + * previously to compute @disk_link->len. If the filesystem did not allocate a + * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one + * will be kmalloc()'ed and the filesystem will be responsible for freeing it. + * + * Return: 0 on success, -errno on failure + */ +static inline int fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(inode)) + return __fscrypt_encrypt_symlink(inode, target, len, disk_link); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 81e02201b215..02ec0aa894d8 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -223,4 +223,20 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_symlink(struct inode *dir, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 562a9bc04560..7e0b67ccd816 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -205,5 +205,11 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *new_dentry, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link); +extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, + struct fscrypt_str *disk_link); #endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit From 3b0d8837a79ba7b7cc324d1f2b206c074e9c6182 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 5 Jan 2018 10:45:02 -0800 Subject: fscrypt: new helper function - fscrypt_get_symlink() Filesystems also have duplicate code to support ->get_link() on encrypted symlinks. Factor it out into a new function fscrypt_get_symlink(). It takes in the contents of the encrypted symlink on-disk and provides the target (decrypted or encoded) that should be returned from ->get_link(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 8 ++++++++ include/linux/fscrypt_supp.h | 3 +++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 02ec0aa894d8..dd106640c6ea 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -239,4 +239,12 @@ static inline int __fscrypt_encrypt_symlink(struct inode *inode, return -EOPNOTSUPP; } +static inline const char *fscrypt_get_symlink(struct inode *inode, + const void *caddr, + unsigned int max_size, + struct delayed_call *done) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 7e0b67ccd816..dc2babf3f7d3 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -211,5 +211,8 @@ extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link); +extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size, + struct delayed_call *done); #endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit From 1e80ad712f69a34d1e3c556d1d674d04b266ad40 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Jan 2018 23:30:08 -0500 Subject: fscrypt: remove fscrypt_fname_usr_to_disk() fscrypt_fname_usr_to_disk() sounded very generic but was actually only used to encrypt symlinks. Remove it now that all filesystems have been switched over to fscrypt_encrypt_symlink(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 7 ------- include/linux/fscrypt_supp.h | 2 -- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index dd106640c6ea..0962f504aa91 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -159,13 +159,6 @@ static inline int fscrypt_fname_disk_to_usr(struct inode *inode, return -EOPNOTSUPP; } -static inline int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index dc2babf3f7d3..e00191deb0d6 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -113,8 +113,6 @@ extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, extern void fscrypt_fname_free_buffer(struct fscrypt_str *); extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, const struct fscrypt_str *, struct fscrypt_str *); -extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, - struct fscrypt_str *); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 -- cgit From 0eaab5b10621e84868df911dad43d330fa1b9bc8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Jan 2018 23:30:08 -0500 Subject: fscrypt: move fscrypt_symlink_data to fscrypt_private.h Now that all filesystems have been converted to use the symlink helper functions, they no longer need the declaration of 'struct fscrypt_symlink_data'. Move it from fscrypt.h to fscrypt_private.h. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 6a678d0e956a..952ab97af325 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -21,15 +21,6 @@ struct fscrypt_ctx; struct fscrypt_info; -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - struct fscrypt_str { unsigned char *name; u32 len; -- cgit From 2cbadadcfdf0d8a538ce32ed12e18ef487773b07 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Jan 2018 23:30:08 -0500 Subject: fscrypt: define fscrypt_fname_alloc_buffer() to be for presented names Previously fscrypt_fname_alloc_buffer() was used to allocate buffers for both presented (decrypted or encoded) and encrypted filenames. That was confusing, because it had to allocate the worst-case size for either, e.g. including NUL-padding even when it was meaningless. But now that fscrypt_setup_filename() no longer calls it, it is only used in the ->get_link() and ->readdir() paths, which specifically want a buffer for presented filenames. Therefore, switch the behavior over to allocating the buffer for presented filenames only. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 0962f504aa91..c9592e307df5 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -140,7 +140,7 @@ static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, } static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, + u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; -- cgit From b9db0b4a68d373897119d1e0a3beebe1c1a23936 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Jan 2018 23:30:08 -0500 Subject: fscrypt: fix up fscrypt_fname_encrypted_size() for internal use Filesystems don't need fscrypt_fname_encrypted_size() anymore, so unexport it and move it to fscrypt_private.h. We also never calculate the encrypted size of a filename without having the fscrypt_info present since it is needed to know the amount of NUL-padding which is determined by the encryption policy, and also we will always truncate the NUL-padding to the maximum filename length. Therefore, also make fscrypt_fname_encrypted_size() assume that the fscrypt_info is present, and make it truncate the returned length to the specified max_len. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 8 -------- include/linux/fscrypt_supp.h | 1 - 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index c9592e307df5..342eb97e0476 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -131,14 +131,6 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) return; } -static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, - u32 ilen) -{ - /* never happens */ - WARN_ON(1); - return 0; -} - static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len, struct fscrypt_str *crypto_str) diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index e00191deb0d6..2dd5767c77b0 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -107,7 +107,6 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) kfree(fname->crypto_buf.name); } -extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -- cgit From 3d204e24d452f96704f5feb83f6b7654245defc9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 11 Jan 2018 23:30:13 -0500 Subject: fscrypt: remove 'ci' parameter from fscrypt_put_encryption_info() fscrypt_put_encryption_info() is only called when evicting an inode, so the 'struct fscrypt_info *ci' parameter is always NULL, and there cannot be races with other threads. This was cruft left over from the broken key revocation code. Remove the unused parameter and the cmpxchg(). Also remove the #ifdefs around the fscrypt_put_encryption_info() calls, since fscrypt_notsupp.h defines a no-op stub for it. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 3 +-- include/linux/fscrypt_supp.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 342eb97e0476..44b50c04bae9 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -105,8 +105,7 @@ static inline int fscrypt_get_encryption_info(struct inode *inode) return -EOPNOTSUPP; } -static inline void fscrypt_put_encryption_info(struct inode *inode, - struct fscrypt_info *ci) +static inline void fscrypt_put_encryption_info(struct inode *inode) { return; } diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 2dd5767c77b0..477a7a6504d2 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -96,7 +96,7 @@ extern int fscrypt_inherit_context(struct inode *, struct inode *, void *, bool); /* keyinfo.c */ extern int fscrypt_get_encryption_info(struct inode *); -extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); +extern void fscrypt_put_encryption_info(struct inode *); /* fname.c */ extern int fscrypt_setup_filename(struct inode *, const struct qstr *, -- cgit From b394d468e7d75637e682a9be4a1181b27186c593 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 14:22:38 -0800 Subject: usercopy: Enhance and rename report_usercopy() In preparation for refactoring the usercopy checks to pass offset to the hardened usercopy report, this renames report_usercopy() to the more accurate usercopy_abort(), marks it as noreturn because it is, adds a hopefully helpful comment for anyone investigating such reports, makes the function available to the slab allocators, and adds new "detail" and "offset" arguments. Signed-off-by: Kees Cook --- include/linux/uaccess.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 251e655d407f..38b6442dc569 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -273,4 +273,10 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) #endif +#ifdef CONFIG_HARDENED_USERCOPY +void __noreturn usercopy_abort(const char *name, const char *detail, + bool to_user, unsigned long offset, + unsigned long len); +#endif + #endif /* __LINUX_UACCESS_H__ */ -- cgit From f4e6e289cb9cf67885b6b18b9d56d2c3e1c714a1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 14:48:22 -0800 Subject: usercopy: Include offset in hardened usercopy report This refactors the hardened usercopy code so that failure reporting can happen within the checking functions instead of at the top level. This simplifies the return value handling and allows more details and offsets to be included in the report. Having the offset can be much more helpful in understanding hardened usercopy bugs. Signed-off-by: Kees Cook --- include/linux/slab.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 50697a1d6621..2dbeccdcb76b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -167,15 +167,11 @@ void kzfree(const void *); size_t ksize(const void *); #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page); +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user); #else -static inline const char *__check_heap_object(const void *ptr, - unsigned long n, - struct page *page) -{ - return NULL; -} +static inline void __check_heap_object(const void *ptr, unsigned long n, + struct page *page, bool to_user) { } #endif /* -- cgit From 4229a470175be14e1d2648713be8a5e8e8fbea02 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 12:53:20 -0800 Subject: stddef.h: Introduce sizeof_field() The size of fields within a structure is needed in a few places in the kernel already, and will be needed for the usercopy whitelisting when declaring whitelist regions within structures. This creates a dedicated macro and redefines offsetofend() to use it. Existing usage, ignoring the 1200+ lustre assert uses: $ git grep -E 'sizeof\(\(\((struct )?[a-zA-Z_]+ \*\)0\)->' | \ grep -v staging/lustre | wc -l 65 Signed-off-by: Kees Cook --- include/linux/stddef.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 2181719fd907..998a4ba28eba 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -19,6 +19,14 @@ enum { #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) #endif +/** + * sizeof_field(TYPE, MEMBER) + * + * @TYPE: The structure containing the field of interest + * @MEMBER: The field to return the size of + */ +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + /** * offsetofend(TYPE, MEMBER) * @@ -26,6 +34,6 @@ enum { * @MEMBER: The member within the structure to get the end offset of */ #define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif -- cgit From 8eb8284b412906181357c2b0110d879d5af95e52 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Sat, 10 Jun 2017 22:50:28 -0400 Subject: usercopy: Prepare for usercopy whitelisting This patch prepares the slab allocator to handle caches having annotations (useroffset and usersize) defining usercopy regions. This patch is modified from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Currently, hardened usercopy performs dynamic bounds checking on slab cache objects. This is good, but still leaves a lot of kernel memory available to be copied to/from userspace in the face of bugs. To further restrict what memory is available for copying, this creates a way to whitelist specific areas of a given slab cache object for copying to/from userspace, allowing much finer granularity of access control. Slab caches that are never exposed to userspace can declare no whitelist for their objects, thereby keeping them unavailable to userspace via dynamic copy operations. (Note, an implicit form of whitelisting is the use of constant sizes in usercopy operations and get_user()/put_user(); these bypass hardened usercopy checks since these sizes cannot change at runtime.) To support this whitelist annotation, usercopy region offset and size members are added to struct kmem_cache. The slab allocator receives a new function, kmem_cache_create_usercopy(), that creates a new cache with a usercopy region defined, suitable for declaring spans of fields within the objects that get copied to/from userspace. In this patch, the default kmem_cache_create() marks the entire allocation as whitelisted, leaving it semantically unchanged. Once all fine-grained whitelists have been added (in subsequent patches), this will be changed to a usersize of 0, making caches created with kmem_cache_create() not copyable to/from userspace. After the entire usercopy whitelist series is applied, less than 15% of the slab cache memory remains exposed to potential usercopy bugs after a fresh boot: Total Slab Memory: 48074720 Usercopyable Memory: 6367532 13.2% task_struct 0.2% 4480/1630720 RAW 0.3% 300/96000 RAWv6 2.1% 1408/64768 ext4_inode_cache 3.0% 269760/8740224 dentry 11.1% 585984/5273856 mm_struct 29.1% 54912/188448 kmalloc-8 100.0% 24576/24576 kmalloc-16 100.0% 28672/28672 kmalloc-32 100.0% 81920/81920 kmalloc-192 100.0% 96768/96768 kmalloc-128 100.0% 143360/143360 names_cache 100.0% 163840/163840 kmalloc-64 100.0% 167936/167936 kmalloc-256 100.0% 339968/339968 kmalloc-512 100.0% 350720/350720 kmalloc-96 100.0% 455616/455616 kmalloc-8192 100.0% 655360/655360 kmalloc-1024 100.0% 812032/812032 kmalloc-4096 100.0% 819200/819200 kmalloc-2048 100.0% 1310720/1310720 After some kernel build workloads, the percentage (mainly driven by dentry and inode caches expanding) drops under 10%: Total Slab Memory: 95516184 Usercopyable Memory: 8497452 8.8% task_struct 0.2% 4000/1456000 RAW 0.3% 300/96000 RAWv6 2.1% 1408/64768 ext4_inode_cache 3.0% 1217280/39439872 dentry 11.1% 1623200/14608800 mm_struct 29.1% 73216/251264 kmalloc-8 100.0% 24576/24576 kmalloc-16 100.0% 28672/28672 kmalloc-32 100.0% 94208/94208 kmalloc-192 100.0% 96768/96768 kmalloc-128 100.0% 143360/143360 names_cache 100.0% 163840/163840 kmalloc-64 100.0% 245760/245760 kmalloc-256 100.0% 339968/339968 kmalloc-512 100.0% 350720/350720 kmalloc-96 100.0% 563520/563520 kmalloc-8192 100.0% 655360/655360 kmalloc-1024 100.0% 794624/794624 kmalloc-4096 100.0% 819200/819200 kmalloc-2048 100.0% 1257472/1257472 Signed-off-by: David Windsor [kees: adjust commit log, split out a few extra kmalloc hunks] [kees: add field names to function declarations] [kees: convert BUGs to WARNs and fail closed] [kees: add attack surface reduction analysis to commit log] Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Christoph Lameter --- include/linux/slab.h | 27 +++++++++++++++++++++------ include/linux/slab_def.h | 3 +++ include/linux/slub_def.h | 3 +++ 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 2dbeccdcb76b..8bf14d9762ec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,9 +135,13 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); -struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, - slab_flags_t, - void (*)(void *)); +struct kmem_cache *kmem_cache_create(const char *name, size_t size, + size_t align, slab_flags_t flags, + void (*ctor)(void *)); +struct kmem_cache *kmem_cache_create_usercopy(const char *name, + size_t size, size_t align, slab_flags_t flags, + size_t useroffset, size_t usersize, + void (*ctor)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -153,9 +157,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + kmem_cache_create(#__struct, sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), NULL) + +/* + * To whitelist a single field for copying to/from usercopy, use this + * macro instead for KMEM_CACHE() above. + */ +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + kmem_cache_create_usercopy(#__struct, \ + sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), \ + offsetof(struct __struct, __field), \ + sizeof_field(struct __struct, __field), NULL) /* * Common kmalloc functions provided by all allocators diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 072e46e9e1d5..7385547c04b1 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -85,6 +85,9 @@ struct kmem_cache { unsigned int *random_seq; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 0adae162dc8f..8ad99c47b19c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -135,6 +135,9 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; -- cgit From afcc90f8621e289cd082ba97900e76f01afe778c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 10 Jan 2018 15:17:01 -0800 Subject: usercopy: WARN() on slab cache usercopy region violations This patch adds checking of usercopy cache whitelisting, and is modified from Brad Spengler/PaX Team's PAX_USERCOPY whitelisting code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. The SLAB and SLUB allocators are modified to WARN() on all copy operations in which the kernel heap memory being modified falls outside of the cache's defined usercopy region. Based on an earlier patch from David Windsor. Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Laura Abbott Cc: Ingo Molnar Cc: Mark Rutland Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/uaccess.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 38b6442dc569..efe79c1cdd47 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -274,6 +274,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #endif #ifdef CONFIG_HARDENED_USERCOPY +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len); void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, unsigned long len); -- cgit From 2d891fbc3bb681ba1f826e7ee70dbe38ca7465fe Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 30 Nov 2017 13:04:32 -0800 Subject: usercopy: Allow strict enforcement of whitelists This introduces CONFIG_HARDENED_USERCOPY_FALLBACK to control the behavior of hardened usercopy whitelist violations. By default, whitelist violations will continue to WARN() so that any bad or missing usercopy whitelists can be discovered without being too disruptive. If this config is disabled at build time or a system is booted with "slab_common.usercopy_fallback=0", usercopy whitelists will BUG() instead of WARN(). This is useful for admins that want to use usercopy whitelists immediately. Suggested-by: Matthew Garrett Signed-off-by: Kees Cook --- include/linux/slab.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 8bf14d9762ec..231abc8976c5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,6 +135,8 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); +extern bool usercopy_fallback; + struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, slab_flags_t flags, void (*ctor)(void *)); -- cgit From 5905429ad85657c28d93ec3d826ddeea1f44c3ce Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Aug 2017 13:00:58 -0700 Subject: fork: Provide usercopy whitelisting for task_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the blocked and saved_sigmask fields of task_struct are copied to userspace (via sigmask_to_save() and setup_rt_frame()), it is always copied with a static length (i.e. sizeof(sigset_t)). The only portion of task_struct that is potentially dynamically sized and may be copied to userspace is in the architecture-specific thread_struct at the end of task_struct. cache object allocation: kernel/fork.c: alloc_task_struct_node(...): return kmem_cache_alloc_node(task_struct_cachep, ...); dup_task_struct(...): ... tsk = alloc_task_struct_node(node); copy_process(...): ... dup_task_struct(...) _do_fork(...): ... copy_process(...) example usage trace: arch/x86/kernel/fpu/signal.c: __fpu__restore_sig(...): ... struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; ... __copy_from_user(&fpu->state.xsave, ..., state_size); fpu__restore_sig(...): ... return __fpu__restore_sig(...); arch/x86/kernel/signal.c: restore_sigcontext(...): ... fpu__restore_sig(...) This introduces arch_thread_struct_whitelist() to let an architecture declare specifically where the whitelist should be within thread_struct. If undefined, the entire thread_struct field is left whitelisted. Cc: Andrew Morton Cc: Nicholas Piggin Cc: Laura Abbott Cc: "Mickaël Salaün" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andy Lutomirski Signed-off-by: Kees Cook Acked-by: Rik van Riel --- include/linux/sched/task.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 05b8650f06f5..5be31eb7b266 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -104,6 +104,20 @@ extern int arch_task_struct_size __read_mostly; # define arch_task_struct_size (sizeof(struct task_struct)) #endif +#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST +/* + * If an architecture has not declared a thread_struct whitelist we + * must assume something there may need to be copied to userspace. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = 0; + /* Handle dynamically sized thread_struct. */ + *size = arch_task_struct_size - offsetof(struct task_struct, thread); +} +#endif + #ifdef CONFIG_VMAP_STACK static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) { -- cgit From 766a4f27f328979c10efd7272b05261166296435 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 9 Jan 2018 14:58:54 +0100 Subject: i2c: make kerneldoc about bus recovery more precise "Used internally" is vague. What it actually means is that those fields are populated by the core if valid GPIOs are provided. Change the comments to reflect that. Tested-by: Phil Reid Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 5d7f3c1853ae..f8a9d81e911e 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -545,12 +545,12 @@ struct i2c_timings { * @recover_bus: Recover routine. Either pass driver's recover_bus() routine, or * i2c_generic_scl_recovery(). * @get_scl: This gets current value of SCL line. Mandatory for generic SCL - * recovery. Used internally for generic GPIO recovery. - * @set_scl: This sets/clears SCL line. Mandatory for generic SCL recovery. Used - * internally for generic GPIO recovery. + * recovery. Populated internally for generic GPIO recovery. + * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery. + * Populated internally for generic GPIO recovery. * @get_sda: This gets current value of SDA line. Optional for generic SCL - * recovery. Used internally, if sda_gpio is a valid GPIO, for generic GPIO - * recovery. + * recovery. Populated internally, if sda_gpio is a valid GPIO, for generic + * GPIO recovery. * @prepare_recovery: This will be called before starting recovery. Platform may * configure padmux here for SDA/SCL line or something else they want. * @unprepare_recovery: This will be called after completing recovery. Platform -- cgit From 6c92204e446694306198c7c394f3692bde46b696 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 9 Jan 2018 14:58:55 +0100 Subject: i2c: add identifier in declarations for i2c_bus_recovery No reason to have them undefined, so let's add them. Tested-by: Phil Reid Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f8a9d81e911e..fd87b806b5f1 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -559,14 +559,14 @@ struct i2c_timings { * @sda_gpiod: gpiod of the SDA line. Only required for GPIO recovery. */ struct i2c_bus_recovery_info { - int (*recover_bus)(struct i2c_adapter *); + int (*recover_bus)(struct i2c_adapter *adap); - int (*get_scl)(struct i2c_adapter *); - void (*set_scl)(struct i2c_adapter *, int val); - int (*get_sda)(struct i2c_adapter *); + int (*get_scl)(struct i2c_adapter *adap); + void (*set_scl)(struct i2c_adapter *adap, int val); + int (*get_sda)(struct i2c_adapter *adap); - void (*prepare_recovery)(struct i2c_adapter *); - void (*unprepare_recovery)(struct i2c_adapter *); + void (*prepare_recovery)(struct i2c_adapter *adap); + void (*unprepare_recovery)(struct i2c_adapter *adap); /* gpio recovery */ struct gpio_desc *scl_gpiod; -- cgit From 8092178ffe67dbd1f987e2e308e871c774774a16 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 9 Jan 2018 14:58:56 +0100 Subject: i2c: add 'set_sda' to bus_recovery_info This will be needed when we want to create STOP conditions, too, later. Create the needed fields and populate them for the GPIO case if the GPIO is set to output. Tested-by: Phil Reid Signed-off-by: Wolfram Sang Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fd87b806b5f1..419a38e7c315 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -551,6 +551,9 @@ struct i2c_timings { * @get_sda: This gets current value of SDA line. Optional for generic SCL * recovery. Populated internally, if sda_gpio is a valid GPIO, for generic * GPIO recovery. + * @set_sda: This sets/clears the SDA line. Optional for generic SCL recovery. + * Populated internally, if sda_gpio is a valid GPIO, for generic GPIO + * recovery. * @prepare_recovery: This will be called before starting recovery. Platform may * configure padmux here for SDA/SCL line or something else they want. * @unprepare_recovery: This will be called after completing recovery. Platform @@ -564,6 +567,7 @@ struct i2c_bus_recovery_info { int (*get_scl)(struct i2c_adapter *adap); void (*set_scl)(struct i2c_adapter *adap, int val); int (*get_sda)(struct i2c_adapter *adap); + void (*set_sda)(struct i2c_adapter *adap, int val); void (*prepare_recovery)(struct i2c_adapter *adap); void (*unprepare_recovery)(struct i2c_adapter *adap); -- cgit From d3452f1d88311c9af16d709d51dba5ad44afbd1d Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Mon, 22 Jan 2018 13:31:11 +0100 Subject: pinctrl: Forward declare struct device pinctrl/devinfo.h is using forward declaration from pinctrl/consumer.h for configurations with CONFIG_PINCTRL defined, however nothing declares it in the opposite case. Fix this by adding a forward declaration. Signed-off-by: Ladislav Michl Signed-off-by: Linus Walleij --- include/linux/pinctrl/devinfo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h index 05082e407c4a..d01a8638bb45 100644 --- a/include/linux/pinctrl/devinfo.h +++ b/include/linux/pinctrl/devinfo.h @@ -43,6 +43,8 @@ extern int pinctrl_init_done(struct device *dev); #else +struct device; + /* Stubs if we're not using pinctrl */ static inline int pinctrl_bind_pins(struct device *dev) -- cgit From 856e1eb9bdd4bd703907925be112519ff65d991f Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 5 Jan 2018 10:45:47 -0600 Subject: PCI/AER: Add uevents in AER and EEH error/resume Devices can go offline when erors reported. This patch adds a change to the kernel object and lets udev know of error. When device resumes, a change is also set reporting device as online. Therefore, EEH and AER events are better propagated to user space for PCI devices in all arches. Signed-off-by: Bryant G. Ly Signed-off-by: Juan J. Alvarez Acked-by: Bjorn Helgaas Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- include/linux/pci.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e3e94467687a..405630441b74 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2277,6 +2277,42 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) return false; } +/** + * pci_uevent_ers - emit a uevent during recovery path of pci device + * @pdev: pci device to check + * @err_type: type of error event + * + */ +static inline void pci_uevent_ers(struct pci_dev *pdev, + enum pci_ers_result err_type) +{ + int idx = 0; + char *envp[3]; + + switch (err_type) { + case PCI_ERS_RESULT_NONE: + case PCI_ERS_RESULT_CAN_RECOVER: + envp[idx++] = "ERROR_EVENT=BEGIN_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + case PCI_ERS_RESULT_RECOVERED: + envp[idx++] = "ERROR_EVENT=SUCCESSFUL_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=1"; + break; + case PCI_ERS_RESULT_DISCONNECT: + envp[idx++] = "ERROR_EVENT=FAILED_RECOVERY"; + envp[idx++] = "DEVICE_ONLINE=0"; + break; + default: + break; + } + + if (idx > 0) { + envp[idx++] = NULL; + kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, envp); + } +} + /* provide the legacy pci_dma_* API */ #include -- cgit From 3df54c870f52b4c47b53eead8d22a109f741b91c Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Wed, 29 Nov 2017 10:55:24 -0700 Subject: ntb_hw_switchtec: Allow using Switchtec NTB in multi-partition setups Allow using Switchtec NTB in setups that have more than two partitions. Note: this does not enable having multi-host communication, it only allows for a single NTB link between two hosts in a network that might have more than two. Use following logic to determine the NT peer partition: 1) If there are 2 partitions, and the target vector is set in the Switchtec configuration, use the partition specified in target vector. 2) If there are 2 partitions and target vector is unset use the only other partition as specified in the NT EP map. 3) If there are more than 2 partitions and target vector is set use the other partition specified in target vector. 4) If there are more than 2 partitions and target vector is unset, this is invalid and report an error. Signed-off-by: Kelvin Cao [logang@deltatee.com: commit message fleshed out] Signed-off-by: Logan Gunthorpe Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- include/linux/switchtec.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 09d73d0d1aa8..d4a7c18b42cf 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -168,6 +168,14 @@ struct ntb_info_regs { u16 reserved1; u64 ep_map; u16 requester_id; + u16 reserved2; + u32 reserved3[4]; + struct nt_partition_info { + u32 xlink_enabled; + u32 target_part_low; + u32 target_part_high; + u32 reserved; + } ntp_info[48]; } __packed; struct part_cfg_regs { -- cgit From 45f447deb29081a7df7b81f2cd9cc8121994d988 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:28 -0700 Subject: ntb_hw_switchtec: Expand PFF CSR registers The PFF CSR registers actual mirrors the PCI configuration space for all the ports in the switch. Previously, this was not needed by the driver but will be used by the crosslink code to enumerate the bus in an host-less centre partition. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- include/linux/switchtec.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index d4a7c18b42cf..6d325a7a0c19 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -292,7 +292,20 @@ enum { struct pff_csr_regs { u16 vendor_id; u16 device_id; - u32 pci_cfg_header[15]; + u16 pcicmd; + u16 pcists; + u32 pci_class; + u32 pci_opts; + union { + u32 pci_bar[6]; + u64 pci_bar64[3]; + }; + u32 pci_cardbus; + u32 pci_subsystem_id; + u32 pci_expansion_rom; + u32 pci_cap_ptr; + u32 reserved1; + u32 pci_irq; u32 pci_cap_region[48]; u32 pcie_cap_region[448]; u32 indirect_gas_window[128]; -- cgit From 01752501820277d217a7b52548d9c948f98d2c56 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:29 -0700 Subject: ntb_hw_switchtec: Add initialization code for crosslink Crosslink is a feature of the Switchtec switches that is similar to the B2B mode of other NTB devices. It allows a system to be designed that is perfectly symmetric with two identical switches that link two hosts together. In order for the system to be symmetric, there is an empty host-less partition between the two switches which the host must enumerate and assign BAR addresses to. The firmware in the switch manages this specially so that the BAR addresses on both sides of the empty partition will be identical despite being in the same partition with the same address space. The driver determines whether crosslink is enabled by a flag set in the NTB partition info registers which are set by the switch's configuration file. When crosslink is enabled, a reserved LUT window is setup to point to the peer's switch's NTB registers and the local MWs are set to forward to the host-less partition's BARs. (Yes, this hurts my brain too.) Once this is setup, largely the same NTB infrastructure is used to communicate between the two hosts. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- include/linux/ntb.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index c308964777eb..ea3be7275a5e 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -71,6 +71,7 @@ struct pci_dev; * @NTB_TOPO_B2B_USD: On primary side of local ntb upstream of remote ntb. * @NTB_TOPO_B2B_DSD: On primary side of local ntb downstream of remote ntb. * @NTB_TOPO_SWITCH: Connected via a switch which supports ntb. + * @NTB_TOPO_CROSSLINK: Connected via two symmetric switchecs */ enum ntb_topo { NTB_TOPO_NONE = -1, @@ -79,6 +80,7 @@ enum ntb_topo { NTB_TOPO_B2B_USD, NTB_TOPO_B2B_DSD, NTB_TOPO_SWITCH, + NTB_TOPO_CROSSLINK, }; static inline int ntb_topo_is_b2b(enum ntb_topo topo) @@ -94,12 +96,13 @@ static inline int ntb_topo_is_b2b(enum ntb_topo topo) static inline char *ntb_topo_string(enum ntb_topo topo) { switch (topo) { - case NTB_TOPO_NONE: return "NTB_TOPO_NONE"; - case NTB_TOPO_PRI: return "NTB_TOPO_PRI"; - case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; - case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; - case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; - case NTB_TOPO_SWITCH: return "NTB_TOPO_SWITCH"; + case NTB_TOPO_NONE: return "NTB_TOPO_NONE"; + case NTB_TOPO_PRI: return "NTB_TOPO_PRI"; + case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; + case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; + case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; + case NTB_TOPO_SWITCH: return "NTB_TOPO_SWITCH"; + case NTB_TOPO_CROSSLINK: return "NTB_TOPO_CROSSLINK"; } return "NTB_TOPO_INVALID"; } -- cgit From b87ab21935d76922362ff98a5a78f16e2e956ead Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:52 +0300 Subject: NTB: Rename NTB messaging API methods There is a common methods signature form used over all the NTB API like functions naming scheme, arguments names and order, etc. Recently added NTB messaging API IO callbacks were named a bit different so should be renamed to be in compliance with the rest of the API. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- include/linux/ntb.h | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index ea3be7275a5e..21b1826a0953 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -253,7 +253,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @msg_set_mask: See ntb_msg_set_mask(). * @msg_clear_mask: See ntb_msg_clear_mask(). * @msg_read: See ntb_msg_read(). - * @msg_write: See ntb_msg_write(). + * @peer_msg_write: See ntb_peer_msg_write(). */ struct ntb_dev_ops { int (*port_number)(struct ntb_dev *ntb); @@ -324,8 +324,8 @@ struct ntb_dev_ops { int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits); int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits); int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits); - int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg); - int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg); + u32 (*msg_read)(struct ntb_dev *ntb, int *pidx, int midx); + int (*peer_msg_write)(struct ntb_dev *ntb, int pidx, int midx, u32 msg); }; static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) @@ -387,7 +387,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* !ops->msg_set_mask == !ops->msg_count && */ /* !ops->msg_clear_mask == !ops->msg_count && */ !ops->msg_read == !ops->msg_count && - !ops->msg_write == !ops->msg_count && + !ops->peer_msg_write == !ops->msg_count && 1; } @@ -1462,31 +1462,29 @@ static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits) } /** - * ntb_msg_read() - read message register with specified index + * ntb_msg_read() - read inbound message register with specified index * @ntb: NTB device context. - * @midx: Message register index * @pidx: OUT - Port index of peer device a message retrieved from - * @msg: OUT - Data + * @midx: Message register index * * Read data from the specified message register. Source port index of a * message is retrieved as well. * - * Return: Zero on success, otherwise a negative error number. + * Return: The value of the inbound message register. */ -static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, - u32 *msg) +static inline u32 ntb_msg_read(struct ntb_dev *ntb, int *pidx, int midx) { if (!ntb->ops->msg_read) - return -EINVAL; + return ~(u32)0; - return ntb->ops->msg_read(ntb, midx, pidx, msg); + return ntb->ops->msg_read(ntb, pidx, midx); } /** - * ntb_msg_write() - write data to the specified message register + * ntb_peer_msg_write() - write data to the specified peer message register * @ntb: NTB device context. - * @midx: Message register index * @pidx: Port index of peer device a message being sent to + * @midx: Message register index * @msg: Data to send * * Send data to a specified peer device using the defined message register. @@ -1495,13 +1493,13 @@ static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, * * Return: Zero on success, otherwise a negative error number. */ -static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx, - u32 msg) +static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx, + u32 msg) { - if (!ntb->ops->msg_write) + if (!ntb->ops->peer_msg_write) return -EINVAL; - return ntb->ops->msg_write(ntb, midx, pidx, msg); + return ntb->ops->peer_msg_write(ntb, pidx, midx, msg); } #endif -- cgit From f1678a4c66a5c2cb43f744f7dc7e048d59690166 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:54 +0300 Subject: NTB: Fix UB/bug in ntb_mw_get_align() Simple (1 << pidx) operation causes undefined behaviour when pidx >= 32. It must be casted to u64 to match the actual return value of ntb_link_is_up() method, so to have all the possible peer indexes covered and to get rid of undefined behaviour. Additionally there are special macros in "linux/bitops.h" to perform the bit-set-shift operations, so it's recommended to have them used for proper bit setting. Signed-off-by: Serge Semin Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- include/linux/ntb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 21b1826a0953..181d16601dd9 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -767,7 +767,7 @@ static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx, resource_size_t *size_align, resource_size_t *size_max) { - if (!(ntb_link_is_up(ntb, NULL, NULL) & (1 << pidx))) + if (!(ntb_link_is_up(ntb, NULL, NULL) & BIT_ULL(pidx))) return -ENOTCONN; return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align, -- cgit From f3804203306e098dae9ca51540fcd5eb700d7f40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:02:22 -0800 Subject: array_index_nospec: Sanitize speculative array de-references array_index_nospec() is proposed as a generic mechanism to mitigate against Spectre-variant-1 attacks, i.e. an attack that bypasses boundary checks via speculative execution. The array_index_nospec() implementation is expected to be safe for current generation CPUs across multiple architectures (ARM, x86). Based on an original implementation by Linus Torvalds, tweaked to remove speculative flows by Alexei Starovoitov, and tweaked again by Linus to introduce an x86 assembly implementation for the mask generation. Co-developed-by: Linus Torvalds Co-developed-by: Alexei Starovoitov Suggested-by: Cyril Novikov Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: Peter Zijlstra Cc: Catalin Marinas Cc: Will Deacon Cc: Russell King Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727414229.33451.18411580953862676575.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/nospec.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 include/linux/nospec.h (limited to 'include/linux') diff --git a/include/linux/nospec.h b/include/linux/nospec.h new file mode 100644 index 000000000000..b99bced39ac2 --- /dev/null +++ b/include/linux/nospec.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Linus Torvalds. All rights reserved. +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#ifndef _LINUX_NOSPEC_H +#define _LINUX_NOSPEC_H + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * When @index is out of bounds (@index >= @size), the sign bit will be + * set. Extend the sign bit to all bits and invert, giving a result of + * zero for an out of bounds index, or ~0 if within bounds [0, @size). + */ +#ifndef array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) + return 0; + + /* + * Always calculate and emit the mask even if the compiler + * thinks the mask is not needed. The compiler does not take + * into account the value of @index under speculation. + */ + OPTIMIZER_HIDE_VAR(index); + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); +} +#endif + +/* + * array_index_nospec - sanitize an array index after a bounds check + * + * For a code sequence like: + * + * if (index < size) { + * index = array_index_nospec(index, size); + * val = array[index]; + * } + * + * ...if the CPU speculates past the bounds check then + * array_index_nospec() will clamp the index within the range of [0, + * size). + */ +#define array_index_nospec(index, size) \ +({ \ + typeof(index) _i = (index); \ + typeof(size) _s = (size); \ + unsigned long _mask = array_index_mask_nospec(_i, _s); \ + \ + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ + \ + _i &= _mask; \ + _i; \ +}) +#endif /* _LINUX_NOSPEC_H */ -- cgit From 56c30ba7b348b90484969054d561f711ba196507 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 Jan 2018 17:03:05 -0800 Subject: vfs, fdtable: Prevent bounds-check bypass via speculative execution 'fd' is a user controlled value that is used as a data dependency to read from the 'fdt->fd' array. In order to avoid potential leaks of kernel memory values, block speculative execution of the instruction stream that could issue reads based on an invalid 'file *' returned from __fcheck_files. Co-developed-by: Elena Reshetova Signed-off-by: Dan Williams Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Al Viro Cc: torvalds@linux-foundation.org Cc: alan@linux.intel.com Link: https://lkml.kernel.org/r/151727418500.33451.17392199002892248656.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/fdtable.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 1c65817673db..41615f38bcff 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i { struct fdtable *fdt = rcu_dereference_raw(files->fdt); - if (fd < fdt->max_fds) + if (fd < fdt->max_fds) { + fd = array_index_nospec(fd, fdt->max_fds); return rcu_dereference_raw(fdt->fd[fd]); + } return NULL; } -- cgit From 2b16f048729bf35e6c28a40cbfad07239f9dcd90 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 31 Jan 2018 14:15:33 +1100 Subject: net: create skb_gso_validate_mac_len() If you take a GSO skb, and split it into packets, will the MAC length (L2 + L3 + L4 headers + payload) of those packets be small enough to fit within a given length? Move skb_gso_mac_seglen() to skbuff.h with other related functions like skb_gso_network_seglen() so we can use it, and then create skb_gso_validate_mac_len to do the full calculation. Signed-off-by: Daniel Axtens Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ac89a93b7c83..5ebc0f869720 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3287,6 +3287,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); @@ -4120,6 +4121,21 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) return hdr_len + skb_gso_transport_seglen(skb); } +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static inline unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + return hdr_len + skb_gso_transport_seglen(skb); +} + /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. -- cgit From 66f793099a636862a71c59d4a6ba91387b155e0c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 1 Feb 2018 11:27:20 +0000 Subject: x86/retpoline: Avoid retpolines for built-in __init functions There's no point in building init code with retpolines, since it runs before any potentially hostile userspace does. And before the retpoline is actually ALTERNATIVEd into place, for much of it. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: karahmed@amazon.de Cc: peterz@infradead.org Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1517484441-1420-2-git-send-email-dwmw@amazon.co.uk --- include/linux/init.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index ea1b31101d9e..506a98151131 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -5,6 +5,13 @@ #include #include +/* Built-in __init functions needn't be compiled with retpoline */ +#if defined(RETPOLINE) && !defined(MODULE) +#define __noretpoline __attribute__((indirect_branch("keep"))) +#else +#define __noretpoline +#endif + /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) * as `initialization' functions. The kernel can take this @@ -40,7 +47,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __latent_entropy +#define __init __section(.init.text) __cold __latent_entropy __noretpoline #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) -- cgit From 60f91826ca62bcf85d6d5fc90941337282787671 Mon Sep 17 00:00:00 2001 From: Kemi Wang Date: Tue, 24 Oct 2017 09:16:42 +0800 Subject: buffer: Avoid setting buffer bits that are already set It's expensive to set buffer flags that are already set, because that causes a costly cache line transition. A common case is setting the "verified" flag during ext4 writes. This patch checks for the flag being set first. With the AIM7/creat-clo benchmark testing on a 48G ramdisk based-on ext4 file system, we see 3.3%(15431->15936) improvement of aim7.jobs-per-min on a 2-sockets broadwell platform. What the benchmark does is: it forks 3000 processes, and each process do the following: a) open a new file b) close the file c) delete the file until loop=100*1000 times. The original patch is contributed by Andi Kleen. Signed-off-by: Andi Kleen Tested-by: Kemi Wang Signed-off-by: Kemi Wang Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8b1bf8d3d4a2..06797ef10fd9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -81,11 +81,14 @@ struct buffer_head { /* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions. + * To avoid reset buffer flags that are already set, because that causes + * a costly cache line transition, check the flag first. */ #define BUFFER_FNS(bit, name) \ static __always_inline void set_buffer_##name(struct buffer_head *bh) \ { \ - set_bit(BH_##bit, &(bh)->b_state); \ + if (!test_bit(BH_##bit, &(bh)->b_state)) \ + set_bit(BH_##bit, &(bh)->b_state); \ } \ static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ { \ -- cgit From 328008a72d38b5bde6491e463405c34a81a65d3e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 15:56:18 +0100 Subject: x86/power: Fix swsusp_arch_resume prototype The declaration for swsusp_arch_resume marks it as 'asmlinkage', but the definition in x86-32 does not, and it fails to include the header with the declaration. This leads to a warning when building with link-time-optimizations: kernel/power/power.h:108:23: error: type of 'swsusp_arch_resume' does not match original declaration [-Werror=lto-type-mismatch] extern asmlinkage int swsusp_arch_resume(void); ^ arch/x86/power/hibernate_32.c:148:0: note: 'swsusp_arch_resume' was previously declared here int swsusp_arch_resume(void) This moves the declaration into a globally visible header file and fixes up both x86 definitions to match it. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Len Brown Cc: Andi Kleen Cc: Nicolas Pitre Cc: linux-pm@vger.kernel.org Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Bart Van Assche Link: https://lkml.kernel.org/r/20180202145634.200291-2-arnd@arndb.de --- include/linux/suspend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cc22a24516d6..440b62f7502e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -384,6 +384,8 @@ extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); extern unsigned long get_safe_page(gfp_t gfp_mask); +extern asmlinkage int swsusp_arch_suspend(void); +extern asmlinkage int swsusp_arch_resume(void); extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); -- cgit From 23c35f48f5fbe33f68904138b23fee64df7d2f0f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Feb 2018 16:44:14 -0800 Subject: pinctrl: remove include file from When pulling the recent pinctrl merge, I was surprised by how a pinctrl-only pull request ended up rebuilding basically the whole kernel. The reason for that ended up being that included , so any change to that file ended up causing pretty much every driver out there to be rebuilt. The reason for that was because 'struct device' has this in it: #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif but we already avoid header includes for these kinds of things in that header file, preferring to just use a forward-declaration of the structure instead. Exactly to avoid this kind of header dependency. Since some drivers seem to expect that header to come in automatically, move the include to instead. It might be better to just make the includes more targeted, but I'm not going to review every driver. It would definitely be good to have a tool for finding and minimizing header dependencies automatically - or at least help with them. Right now we almost certainly end up having way too many of these things, and it's hard to test every single configuration. FWIW, you can get a sense of the "hotness" of a header file with something like this after doing a full build: find . -name '.*.o.cmd' -print0 | xargs -0 tail --lines=+2 | grep -v 'wildcard ' | tr ' \\' '\n' | sort | uniq -c | sort -n | less -S which isn't exact (there are other things in those '*.o.cmd' than just the dependencies, and the "--lines=+2" only removes the header), but might a useful approximation. With this patch, drops to "only" having 833 users in the current x86-64 allmodconfig. In contrast, has 14857 build files including it directly or indirectly. Of course, the headers that absolutely _everybody_ includes (things like etc) get a score of 23000+. Cc: Linus Walleij Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/device.h | 2 +- include/linux/pinctrl/pinctrl.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index f649fc0c2571..b093405ed525 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -41,6 +40,7 @@ struct fwnode_handle; struct iommu_ops; struct iommu_group; struct iommu_fwspec; +struct dev_pin_info; struct bus_attribute { struct attribute attr; diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 5e45385c5bdc..8f5dbb84547a 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -18,6 +18,7 @@ #include #include #include +#include struct device; struct pinctrl_dev; -- cgit