diff options
Diffstat (limited to 'include')
115 files changed, 2901 insertions, 763 deletions
diff --git a/include/asm-generic/fprobe.h b/include/asm-generic/fprobe.h new file mode 100644 index 000000000000..8659a4dc6eb6 --- /dev/null +++ b/include/asm-generic/fprobe.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Generic arch dependent fprobe macros. + */ +#ifndef __ASM_GENERIC_FPROBE_H__ +#define __ASM_GENERIC_FPROBE_H__ + +#include <linux/bits.h> + +#ifdef CONFIG_64BIT +/* + * Encoding the size and the address of fprobe into one 64bit entry. + * The 32bit architectures should use 2 entries to store those info. + */ + +#define ARCH_DEFINE_ENCODE_FPROBE_HEADER + +#define FPROBE_HEADER_MSB_SIZE_SHIFT (BITS_PER_LONG - FPROBE_DATA_SIZE_BITS) +#define FPROBE_HEADER_MSB_MASK \ + GENMASK(FPROBE_HEADER_MSB_SIZE_SHIFT - 1, 0) + +/* + * By default, this expects the MSBs in the address of kprobe is 0xf. + * If any arch needs another fixed pattern (e.g. s390 is zero filled), + * override this. + */ +#define FPROBE_HEADER_MSB_PATTERN \ + GENMASK(BITS_PER_LONG - 1, FPROBE_HEADER_MSB_SIZE_SHIFT) + +#define arch_fprobe_header_encodable(fp) \ + (((unsigned long)(fp) & ~FPROBE_HEADER_MSB_MASK) == \ + FPROBE_HEADER_MSB_PATTERN) + +#define arch_encode_fprobe_header(fp, size) \ + (((unsigned long)(fp) & FPROBE_HEADER_MSB_MASK) | \ + ((unsigned long)(size) << FPROBE_HEADER_MSB_SIZE_SHIFT)) + +#define arch_decode_fprobe_header_size(val) \ + ((unsigned long)(val) >> FPROBE_HEADER_MSB_SIZE_SHIFT) + +#define arch_decode_fprobe_header_fp(val) \ + ((struct fprobe *)(((unsigned long)(val) & FPROBE_HEADER_MSB_MASK) | \ + FPROBE_HEADER_MSB_PATTERN)) +#endif /* CONFIG_64BIT */ + +#endif /* __ASM_GENERIC_FPROBE_H__ */ diff --git a/include/drm/bridge/dw_mipi_dsi2.h b/include/drm/bridge/dw_mipi_dsi2.h new file mode 100644 index 000000000000..c18c49379247 --- /dev/null +++ b/include/drm/bridge/dw_mipi_dsi2.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, Fuzhou Rockchip Electronics Co., Ltd + * + * Authors: Guochun Huang <hero.huang@rock-chips.com> + * Heiko Stuebner <heiko.stuebner@cherry.de> + */ + +#ifndef __DW_MIPI_DSI2__ +#define __DW_MIPI_DSI2__ + +#include <linux/regmap.h> +#include <linux/types.h> + +#include <drm/drm_atomic.h> +#include <drm/drm_bridge.h> +#include <drm/drm_connector.h> +#include <drm/drm_crtc.h> +#include <drm/drm_modes.h> + +struct drm_display_mode; +struct drm_encoder; +struct dw_mipi_dsi2; +struct mipi_dsi_device; +struct platform_device; + +enum dw_mipi_dsi2_phy_type { + DW_MIPI_DSI2_DPHY, + DW_MIPI_DSI2_CPHY, +}; + +struct dw_mipi_dsi2_phy_iface { + int ppi_width; + enum dw_mipi_dsi2_phy_type phy_type; +}; + +struct dw_mipi_dsi2_phy_timing { + u32 data_hs2lp; + u32 data_lp2hs; +}; + +struct dw_mipi_dsi2_phy_ops { + int (*init)(void *priv_data); + void (*power_on)(void *priv_data); + void (*power_off)(void *priv_data); + void (*get_interface)(void *priv_data, struct dw_mipi_dsi2_phy_iface *iface); + int (*get_lane_mbps)(void *priv_data, + const struct drm_display_mode *mode, + unsigned long mode_flags, u32 lanes, u32 format, + unsigned int *lane_mbps); + int (*get_timing)(void *priv_data, unsigned int lane_mbps, + struct dw_mipi_dsi2_phy_timing *timing); + int (*get_esc_clk_rate)(void *priv_data, unsigned int *esc_clk_rate); +}; + +struct dw_mipi_dsi2_host_ops { + int (*attach)(void *priv_data, + struct mipi_dsi_device *dsi); + int (*detach)(void *priv_data, + struct mipi_dsi_device *dsi); +}; + +struct dw_mipi_dsi2_plat_data { + struct regmap *regmap; + unsigned int max_data_lanes; + + enum drm_mode_status (*mode_valid)(void *priv_data, + const struct drm_display_mode *mode, + unsigned long mode_flags, + u32 lanes, u32 format); + + bool (*mode_fixup)(void *priv_data, const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); + + u32 *(*get_input_bus_fmts)(void *priv_data, + struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + u32 output_fmt, + unsigned int *num_input_fmts); + + const struct dw_mipi_dsi2_phy_ops *phy_ops; + const struct dw_mipi_dsi2_host_ops *host_ops; + + void *priv_data; +}; + +struct dw_mipi_dsi2 *dw_mipi_dsi2_probe(struct platform_device *pdev, + const struct dw_mipi_dsi2_plat_data *plat_data); +void dw_mipi_dsi2_remove(struct dw_mipi_dsi2 *dsi2); +int dw_mipi_dsi2_bind(struct dw_mipi_dsi2 *dsi2, struct drm_encoder *encoder); +void dw_mipi_dsi2_unbind(struct dw_mipi_dsi2 *dsi2); + +#endif /* __DW_MIPI_DSI2__ */ diff --git a/include/drm/drm_client_setup.h b/include/drm/clients/drm_client_setup.h index 46aab3fb46be..46aab3fb46be 100644 --- a/include/drm/drm_client_setup.h +++ b/include/drm/clients/drm_client_setup.h diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 279624833ea9..8f4054a56039 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -567,6 +567,11 @@ int drm_dp_dpcd_read_phy_link_status(struct drm_dp_aux *aux, enum drm_dp_phy dp_phy, u8 link_status[DP_LINK_STATUS_SIZE]); +int drm_dp_dpcd_write_payload(struct drm_dp_aux *aux, + int vcpid, u8 start_time_slot, u8 time_slot_count); +int drm_dp_dpcd_clear_payload(struct drm_dp_aux *aux); +int drm_dp_dpcd_poll_act_handled(struct drm_dp_aux *aux, int timeout_ms); + bool drm_dp_send_real_edid_checksum(struct drm_dp_aux *aux, u8 real_edid_checksum); diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index a80ba457a858..e39de161c938 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -867,8 +867,7 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); -fixed20_12 drm_dp_get_vc_payload_bw(const struct drm_dp_mst_topology_mgr *mgr, - int link_rate, int link_lane_count); +fixed20_12 drm_dp_get_vc_payload_bw(int link_rate, int link_lane_count); int drm_dp_calc_pbn_mode(int clock, int bpp); diff --git a/include/drm/display/drm_hdmi_audio_helper.h b/include/drm/display/drm_hdmi_audio_helper.h new file mode 100644 index 000000000000..c9a6faef4109 --- /dev/null +++ b/include/drm/display/drm_hdmi_audio_helper.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_DISPLAY_HDMI_AUDIO_HELPER_H_ +#define DRM_DISPLAY_HDMI_AUDIO_HELPER_H_ + +#include <linux/types.h> + +struct drm_connector; +struct drm_connector_hdmi_audio_funcs; + +struct device; + +int drm_connector_hdmi_audio_init(struct drm_connector *connector, + struct device *hdmi_codec_dev, + const struct drm_connector_hdmi_audio_funcs *funcs, + unsigned int max_i2s_playback_channels, + bool spdif_playback, + int sound_dai_port); +void drm_connector_hdmi_audio_plugged_notify(struct drm_connector *connector, + bool plugged); + +#endif diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h index 2d45fcfa4619..44ec5c4a7503 100644 --- a/include/drm/display/drm_hdmi_state_helper.h +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -6,8 +6,11 @@ struct drm_atomic_state; struct drm_connector; struct drm_connector_state; +struct drm_display_mode; struct hdmi_audio_infoframe; +enum drm_connector_status; + void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, struct drm_connector_state *new_conn_state); @@ -19,5 +22,12 @@ int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector int drm_atomic_helper_connector_hdmi_clear_audio_infoframe(struct drm_connector *connector); int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, struct drm_atomic_state *state); +void drm_atomic_helper_connector_hdmi_hotplug(struct drm_connector *connector, + enum drm_connector_status status); +void drm_atomic_helper_connector_hdmi_force(struct drm_connector *connector); + +enum drm_mode_status +drm_hdmi_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode); #endif // DRM_HDMI_STATE_HELPER_H_ diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index e8d735b7f6a4..496dbbd2ad7e 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -41,6 +41,8 @@ struct drm_display_info; struct drm_minor; struct drm_panel; struct edid; +struct hdmi_codec_daifmt; +struct hdmi_codec_params; struct i2c_adapter; /** @@ -677,6 +679,57 @@ struct drm_bridge_funcs { const u8 *buffer, size_t len); /** + * @hdmi_audio_startup: + * + * Called when ASoC starts an audio stream setup. The + * @hdmi_audio_startup() is optional. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*hdmi_audio_startup)(struct drm_connector *connector, + struct drm_bridge *bridge); + + /** + * @hdmi_audio_prepare: + * Configures HDMI-encoder for audio stream. Can be called multiple + * times for each setup. Mandatory if HDMI audio is enabled in the + * bridge's configuration. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*hdmi_audio_prepare)(struct drm_connector *connector, + struct drm_bridge *bridge, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms); + + /** + * @hdmi_audio_shutdown: + * + * Shut down the audio stream. Mandatory if HDMI audio is enabled in + * the bridge's configuration. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + void (*hdmi_audio_shutdown)(struct drm_connector *connector, + struct drm_bridge *bridge); + + /** + * @hdmi_audio_mute_stream: + * + * Mute/unmute HDMI audio stream. The @hdmi_audio_mute_stream callback + * is optional. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*hdmi_audio_mute_stream)(struct drm_connector *connector, + struct drm_bridge *bridge, + bool enable, int direction); + + /** * @debugfs_init: * * Allows bridges to create bridge-specific debugfs files. @@ -859,6 +912,27 @@ struct drm_bridge { * @DRM_BRIDGE_OP_HDMI is set. */ unsigned int max_bpc; + + /** + * @hdmi_audio_dev: device to be used as a parent for the HDMI Codec + */ + struct device *hdmi_audio_dev; + + /** + * @hdmi_audio_max_i2s_playback_channels: maximum number of playback + * I2S channels for the HDMI codec + */ + int hdmi_audio_max_i2s_playback_channels; + + /** + * @hdmi_audio_spdif_playback: set if HDMI codec has S/PDIF playback port + */ + unsigned int hdmi_audio_spdif_playback : 1; + + /** + * @hdmi_audio_dai_port: sound DAI port, -1 if it is not enabled + */ + int hdmi_audio_dai_port; }; static inline struct drm_bridge * diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e3fa43291f44..f13d597370a3 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -45,7 +45,10 @@ struct drm_property; struct drm_property_blob; struct drm_printer; struct drm_privacy_screen; +struct drm_edid; struct edid; +struct hdmi_codec_daifmt; +struct hdmi_codec_params; struct i2c_adapter; enum drm_connector_force { @@ -1141,6 +1144,53 @@ struct drm_connector_state { struct drm_connector_hdmi_state hdmi; }; +struct drm_connector_hdmi_audio_funcs { + /** + * @startup: + * + * Called when ASoC starts an audio stream setup. The + * @startup() is optional. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*startup)(struct drm_connector *connector); + + /** + * @prepare: + * Configures HDMI-encoder for audio stream. Can be called + * multiple times for each setup. Mandatory. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*prepare)(struct drm_connector *connector, + struct hdmi_codec_daifmt *fmt, + struct hdmi_codec_params *hparms); + + /** + * @shutdown: + * + * Shut down the audio stream. Mandatory. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + void (*shutdown)(struct drm_connector *connector); + + /** + * @mute_stream: + * + * Mute/unmute HDMI audio stream. The @mute_stream callback is + * optional. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*mute_stream)(struct drm_connector *connector, + bool enable, int direction); +}; + /** * struct drm_connector_hdmi_funcs - drm_hdmi_connector control functions */ @@ -1198,6 +1248,21 @@ struct drm_connector_hdmi_funcs { int (*write_infoframe)(struct drm_connector *connector, enum hdmi_infoframe_type type, const u8 *buffer, size_t len); + + /** + * @read_edid: + * + * This callback is used by the framework as a replacement for reading + * the EDID from connector->ddc. It is still recommended to provide + * connector->ddc instead of implementing this callback. Returned EDID + * should be freed via the drm_edid_free(). + * + * The @read_edid callback is optional. + * + * Returns: + * Valid EDID on success, NULL in case of failure. + */ + const struct drm_edid *(*read_edid)(struct drm_connector *connector); }; /** @@ -1660,6 +1725,68 @@ struct drm_cmdline_mode { bool tv_mode_specified; }; +/** + * struct drm_connector_hdmi_audio - DRM gemeric HDMI Codec-related structure + * + * HDMI drivers usually incorporate a HDMI Codec. This structure expresses the + * generic HDMI Codec as used by the DRM HDMI Codec framework. + */ +struct drm_connector_hdmi_audio { + /** + * @funcs: + * + * Implementation of the HDMI codec functionality to be used by the DRM + * HDMI Codec framework. + */ + const struct drm_connector_hdmi_audio_funcs *funcs; + + /** + * @codec_pdev: + * + * Platform device created to hold the HDMI Codec. It will be + * automatically unregistered during drm_connector_cleanup(). + */ + struct platform_device *codec_pdev; + + /** + * @lock: + * + * Mutex to protect @last_state, @plugged_cb and @plugged_cb_dev. + */ + struct mutex lock; + + /** + * @plugged_cb: + * + * Callback to be called when the HDMI sink get plugged to or unplugged + * from this connector. This is assigned by the framework when + * requested by the ASoC code. + */ + void (*plugged_cb)(struct device *dev, bool plugged); + + /** + * @plugged_cb_dev: + * + * The data for @plugged_cb(). It is being provided by the ASoC. + */ + struct device *plugged_cb_dev; + + /** + * @last_state: + * + * Last plugged state recored by the framework. It is used to correctly + * report the state to @plugged_cb(). + */ + bool last_state; + + /** + * @dai_port: + * + * The port in DT that is used for the Codec DAI. + */ + int dai_port; +}; + /* * struct drm_connector_hdmi - DRM Connector HDMI-related structure */ @@ -2001,8 +2128,11 @@ struct drm_connector { struct drm_encoder *encoder; #define MAX_ELD_BYTES 128 - /** @eld: EDID-like data, if present */ + /** @eld: EDID-like data, if present, protected by @eld_mutex */ uint8_t eld[MAX_ELD_BYTES]; + /** @eld_mutex: protection for concurrenct access to @eld */ + struct mutex eld_mutex; + /** @latency_present: AV delay info from ELD, if found */ bool latency_present[2]; /** @@ -2118,6 +2248,11 @@ struct drm_connector { * @hdmi: HDMI-related variable and properties. */ struct drm_connector_hdmi hdmi; + + /** + * @hdmi_audio: HDMI codec properties and non-DRM state. + */ + struct drm_connector_hdmi_audio hdmi_audio; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) @@ -2126,6 +2261,11 @@ int drm_connector_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type); +int drm_connector_dynamic_init(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc); int drm_connector_init_with_ddc(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, @@ -2147,6 +2287,7 @@ int drmm_connector_hdmi_init(struct drm_device *dev, unsigned int max_bpc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); +int drm_connector_dynamic_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); int drm_connector_attach_encoder(struct drm_connector *connector, struct drm_encoder *encoder); diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 1bbbcb8e2d23..9952b846c170 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -34,6 +34,7 @@ #include <drm/drm_device.h> +struct dmem_cgroup_region; struct drm_fb_helper; struct drm_fb_helper_surface_size; struct drm_file; @@ -401,8 +402,6 @@ struct drm_driver { char *name; /** @desc: driver description */ char *desc; - /** @date: driver date, unused, to be removed */ - char *date; /** * @driver_features: @@ -438,6 +437,10 @@ void *__devm_drm_dev_alloc(struct device *parent, const struct drm_driver *driver, size_t size, size_t offset); +struct dmem_cgroup_region * +drmm_cgroup_register_region(struct drm_device *dev, + const char *region_name, u64 size); + /** * devm_drm_dev_alloc - Resource managed allocation of a &drm_device instance * @parent: Parent device object diff --git a/include/drm/drm_fbdev_client.h b/include/drm/drm_fbdev_client.h deleted file mode 100644 index e11a5614f127..000000000000 --- a/include/drm/drm_fbdev_client.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -#ifndef DRM_FBDEV_CLIENT_H -#define DRM_FBDEV_CLIENT_H - -struct drm_device; -struct drm_format_info; - -#ifdef CONFIG_DRM_FBDEV_EMULATION -int drm_fbdev_client_setup(struct drm_device *dev, const struct drm_format_info *format); -#else -static inline int drm_fbdev_client_setup(struct drm_device *dev, - const struct drm_format_info *format) -{ - return 0; -} -#endif - -#endif diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index f0ef32e9fa5e..ef817926cddd 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -494,6 +494,7 @@ struct drm_memory_stats { enum drm_gem_object_status; +int drm_memory_stats_is_zero(const struct drm_memory_stats *stats); void drm_print_memory_stats(struct drm_printer *p, const struct drm_memory_stats *stats, enum drm_gem_object_status supported_status, diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 5b8b1b059d32..fdae947682cd 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -48,19 +48,21 @@ struct drm_gem_object; * enum drm_gem_object_status - bitmask of object state for fdinfo reporting * @DRM_GEM_OBJECT_RESIDENT: object is resident in memory (ie. not unpinned) * @DRM_GEM_OBJECT_PURGEABLE: object marked as purgeable by userspace + * @DRM_GEM_OBJECT_ACTIVE: object is currently used by an active submission * * Bitmask of status used for fdinfo memory stats, see &drm_gem_object_funcs.status - * and drm_show_fdinfo(). Note that an object can DRM_GEM_OBJECT_PURGEABLE if - * it still active or not resident, in which case drm_show_fdinfo() will not + * and drm_show_fdinfo(). Note that an object can report DRM_GEM_OBJECT_PURGEABLE + * and be active or not resident, in which case drm_show_fdinfo() will not * account for it as purgeable. So drivers do not need to check if the buffer - * is idle and resident to return this bit. (Ie. userspace can mark a buffer - * as purgeable even while it is still busy on the GPU.. it does not _actually_ - * become puregeable until it becomes idle. The status gem object func does - * not need to consider this.) + * is idle and resident to return this bit, i.e. userspace can mark a buffer as + * purgeable even while it is still busy on the GPU. It will not get reported in + * the puregeable stats until it becomes idle. The status gem object func does + * not need to consider this. */ enum drm_gem_object_status { DRM_GEM_OBJECT_RESIDENT = BIT(0), DRM_GEM_OBJECT_PURGEABLE = BIT(1), + DRM_GEM_OBJECT_ACTIVE = BIT(2), }; /** diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index b3906dc04388..f77fe1531cf8 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -199,6 +199,8 @@ void drm_puts(struct drm_printer *p, const char *str); void drm_print_regset32(struct drm_printer *p, struct debugfs_regset32 *regset); void drm_print_bits(struct drm_printer *p, unsigned long value, const char * const bits[], unsigned int nbits); +void drm_print_hex_dump(struct drm_printer *p, const char *prefix, + const u8 *buf, size_t len); __printf(2, 0) /** diff --git a/include/drm/drm_utils.h b/include/drm/drm_utils.h index 70775748d243..15fa9b6865f4 100644 --- a/include/drm/drm_utils.h +++ b/include/drm/drm_utils.h @@ -12,8 +12,12 @@ #include <linux/types.h> +struct drm_edid; + int drm_get_panel_orientation_quirk(int width, int height); +int drm_get_panel_min_brightness_quirk(const struct drm_edid *edid); + signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec); #endif diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 32480b5563db..77c826589ec1 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -717,37 +717,66 @@ MACRO__(0xA7AB, ## __VA_ARGS__) /* DG2 */ -#define INTEL_DG2_G10_IDS(MACRO__, ...) \ - MACRO__(0x5690, ## __VA_ARGS__), \ - MACRO__(0x5691, ## __VA_ARGS__), \ - MACRO__(0x5692, ## __VA_ARGS__), \ +#define INTEL_DG2_G10_D_IDS(MACRO__, ...) \ MACRO__(0x56A0, ## __VA_ARGS__), \ MACRO__(0x56A1, ## __VA_ARGS__), \ - MACRO__(0x56A2, ## __VA_ARGS__), \ + MACRO__(0x56A2, ## __VA_ARGS__) + +#define INTEL_DG2_G10_E_IDS(MACRO__, ...) \ MACRO__(0x56BE, ## __VA_ARGS__), \ MACRO__(0x56BF, ## __VA_ARGS__) -#define INTEL_DG2_G11_IDS(MACRO__, ...) \ - MACRO__(0x5693, ## __VA_ARGS__), \ - MACRO__(0x5694, ## __VA_ARGS__), \ - MACRO__(0x5695, ## __VA_ARGS__), \ +#define INTEL_DG2_G10_M_IDS(MACRO__, ...) \ + MACRO__(0x5690, ## __VA_ARGS__), \ + MACRO__(0x5691, ## __VA_ARGS__), \ + MACRO__(0x5692, ## __VA_ARGS__) + +#define INTEL_DG2_G10_IDS(MACRO__, ...) \ + INTEL_DG2_G10_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G10_E_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G10_M_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_DG2_G11_D_IDS(MACRO__, ...) \ MACRO__(0x56A5, ## __VA_ARGS__), \ MACRO__(0x56A6, ## __VA_ARGS__), \ MACRO__(0x56B0, ## __VA_ARGS__), \ - MACRO__(0x56B1, ## __VA_ARGS__), \ + MACRO__(0x56B1, ## __VA_ARGS__) + +#define INTEL_DG2_G11_E_IDS(MACRO__, ...) \ MACRO__(0x56BA, ## __VA_ARGS__), \ MACRO__(0x56BB, ## __VA_ARGS__), \ MACRO__(0x56BC, ## __VA_ARGS__), \ MACRO__(0x56BD, ## __VA_ARGS__) -#define INTEL_DG2_G12_IDS(MACRO__, ...) \ - MACRO__(0x5696, ## __VA_ARGS__), \ - MACRO__(0x5697, ## __VA_ARGS__), \ +#define INTEL_DG2_G11_M_IDS(MACRO__, ...) \ + MACRO__(0x5693, ## __VA_ARGS__), \ + MACRO__(0x5694, ## __VA_ARGS__), \ + MACRO__(0x5695, ## __VA_ARGS__) + +#define INTEL_DG2_G11_IDS(MACRO__, ...) \ + INTEL_DG2_G11_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G11_E_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G11_M_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_DG2_G12_D_IDS(MACRO__, ...) \ MACRO__(0x56A3, ## __VA_ARGS__), \ MACRO__(0x56A4, ## __VA_ARGS__), \ MACRO__(0x56B2, ## __VA_ARGS__), \ MACRO__(0x56B3, ## __VA_ARGS__) +#define INTEL_DG2_G12_M_IDS(MACRO__, ...) \ + MACRO__(0x5696, ## __VA_ARGS__), \ + MACRO__(0x5697, ## __VA_ARGS__) + +#define INTEL_DG2_G12_IDS(MACRO__, ...) \ + INTEL_DG2_G12_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G12_M_IDS(MACRO__, ## __VA_ARGS__) + +#define INTEL_DG2_D_IDS(MACRO__, ...) \ + INTEL_DG2_G10_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G11_D_IDS(MACRO__, ## __VA_ARGS__), \ + INTEL_DG2_G12_D_IDS(MACRO__, ## __VA_ARGS__) + #define INTEL_DG2_IDS(MACRO__, ...) \ INTEL_DG2_G10_IDS(MACRO__, ## __VA_ARGS__), \ INTEL_DG2_G11_IDS(MACRO__, ## __VA_ARGS__), \ @@ -829,6 +858,7 @@ MACRO__(0xB092, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ MACRO__(0xB0A1, ## __VA_ARGS__), \ - MACRO__(0xB0A2, ## __VA_ARGS__) + MACRO__(0xB0A2, ## __VA_ARGS__), \ + MACRO__(0xB0B0, ## __VA_ARGS__) #endif /* __PCIIDS_H__ */ diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 5804408815be..8ea11cd8df39 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo); int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man, struct ttm_operation_ctx *ctx); +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, + void *buf, int len, int write); vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, struct vm_fault *vmf); vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index be034be56ba1..ee688d0c029b 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -38,6 +38,7 @@ #define TTM_MAX_BO_PRIORITY 4U #define TTM_NUM_MEM_TYPES 8 +struct dmem_cgroup_device; struct ttm_device; struct ttm_resource_manager; struct ttm_resource; @@ -211,6 +212,11 @@ struct ttm_resource_manager { * bdev->lru_lock. */ uint64_t usage; + + /** + * @cg: &dmem_cgroup_region used for memory accounting, if not NULL. + */ + struct dmem_cgroup_region *cg; }; /** @@ -239,6 +245,7 @@ struct ttm_bus_placement { * @placement: Placement flags. * @bus: Placement on io bus accessible to the CPU * @bo: weak reference to the BO, protected by ttm_device::lru_lock + * @css: cgroup state this resource is charged to * * Structure indicating the placement and space resources used by a * buffer object. @@ -251,6 +258,8 @@ struct ttm_resource { struct ttm_bus_placement bus; struct ttm_buffer_object *bo; + struct dmem_cgroup_pool_state *css; + /** * @lru: Least recently used list, see &ttm_resource_manager.lru */ @@ -432,7 +441,8 @@ void ttm_resource_fini(struct ttm_resource_manager *man, int ttm_resource_alloc(struct ttm_buffer_object *bo, const struct ttm_place *place, - struct ttm_resource **res); + struct ttm_resource **res, + struct dmem_cgroup_pool_state **ret_limit_pool); void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res); bool ttm_resource_intersects(struct ttm_device *bdev, struct ttm_resource *res, diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index e6c00e860951..3305c849abd6 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -42,7 +42,9 @@ struct linux_binprm { * Set when errors can no longer be returned to the * original userspace. */ - point_of_no_return:1; + point_of_no_return:1, + /* Set when "comm" must come from the dentry. */ + comm_from_dentry:1; struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index dbf0f74c1529..802f52e38efd 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -7,10 +7,12 @@ enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ - BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ - BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ - BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ - BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */ + BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */ + BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */ + BIP_CHECK_GUARD = 1 << 5, /* guard check */ + BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ + BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ }; struct bio_integrity_payload { @@ -21,6 +23,7 @@ struct bio_integrity_payload { unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ + u16 app_tag; /* application tag value */ struct bvec_iter bio_iter; /* for rewinding parent bio */ @@ -30,6 +33,9 @@ struct bio_integrity_payload { struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; +#define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \ + BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) + #ifdef CONFIG_BLK_DEV_INTEGRITY #define bip_for_each_vec(bvl, bip, iter) \ @@ -72,7 +78,8 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len); +int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter); +int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); @@ -98,8 +105,12 @@ static inline void bioset_integrity_free(struct bio_set *bs) { } -static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, - ssize_t len) +static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) +{ + return -EINVAL; +} + +static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta) { return -EINVAL; } diff --git a/include/linux/bio.h b/include/linux/bio.h index 7a1b3b1a8fed..4b79bf50f4f0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -19,9 +19,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs) return min(nr_segs, BIO_MAX_VECS); } -#define bio_prio(bio) (bio)->bi_ioprio -#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) - #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) @@ -416,8 +413,6 @@ int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, unsigned off); bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); -extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, - unsigned int, unsigned int); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h deleted file mode 100644 index ca544e1d3508..000000000000 --- a/include/linux/blk-mq-pci.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_PCI_H -#define _LINUX_BLK_MQ_PCI_H - -struct blk_mq_queue_map; -struct pci_dev; - -void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset); - -#endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h deleted file mode 100644 index 13226e9b22dd..000000000000 --- a/include/linux/blk-mq-virtio.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_VIRTIO_H -#define _LINUX_BLK_MQ_VIRTIO_H - -struct blk_mq_queue_map; -struct virtio_device; - -void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, - struct virtio_device *vdev, int first_vec); - -#endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c596e0e4cb75..a0a9007cc1e3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -296,13 +296,6 @@ enum blk_eh_timer_return { BLK_EH_RESET_TIMER, }; -/* Keep alloc_policy_name[] in sync with the definitions below */ -enum { - BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */ - BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */ - BLK_TAG_ALLOC_MAX -}; - /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device @@ -668,7 +661,6 @@ struct blk_mq_ops { /* Keep hctx_flag_name[] in sync with the definitions below */ enum { - BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for @@ -677,23 +669,20 @@ enum { BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 4, - /* Do not allow an I/O scheduler to be configured. */ - BLK_MQ_F_NO_SCHED = 1 << 5, + + /* + * Alloc tags on a round-robin base instead of the first available one. + */ + BLK_MQ_F_TAG_RR = 1 << 5, /* * Select 'none' during queue registration in case of a single hwq * or shared hwqs instead of 'mq-deadline'. */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, - BLK_MQ_F_ALLOC_POLICY_START_BIT = 7, - BLK_MQ_F_ALLOC_POLICY_BITS = 1, + + BLK_MQ_F_MAX = 1 << 7, }; -#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ - ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ - ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) -#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ - ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ - << BLK_MQ_F_ALLOC_POLICY_START_BIT) #define BLK_MQ_MAX_DEPTH (10240) #define BLK_MQ_NO_HCTX_IDX (-1U) @@ -921,6 +910,8 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); @@ -977,14 +968,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } -static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, - unsigned int nr_segs) -{ - rq->nr_phys_segments = nr_segs; - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; -} - void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 378d3a1a22fc..76f0a4e7c2e5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -331,8 +331,8 @@ typedef unsigned int __bitwise blk_features_t; #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) -/* stacked device can/does support atomic writes */ -#define BLK_FEAT_ATOMIC_WRITES_STACKED \ +/* atomic writes enabled */ +#define BLK_FEAT_ATOMIC_WRITES \ ((__force blk_features_t)(1u << 16)) /* @@ -581,6 +581,12 @@ struct request_queue { #ifdef CONFIG_LOCKDEP struct task_struct *mq_freeze_owner; int mq_freeze_owner_depth; + /* + * Records disk & queue state in current context, used in unfreeze + * queue + */ + bool mq_freeze_disk_dead; + bool mq_freeze_queue_dying; #endif wait_queue_head_t mq_freeze_wq; /* @@ -938,8 +944,7 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset, * the caller can modify. The caller must call queue_limits_commit_update() * to finish the update. * - * Context: process context. The caller must have frozen the queue or ensured - * that there is outstanding I/O by other means. + * Context: process context. */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) @@ -947,6 +952,8 @@ queue_limits_start_update(struct request_queue *q) mutex_lock(&q->limits_lock); return q->limits; } +int queue_limits_commit_update_frozen(struct request_queue *q, + struct queue_limits *lim); int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim); int queue_limits_set(struct request_queue *q, struct queue_limits *lim); @@ -1699,6 +1706,15 @@ struct io_comp_batch { void (*complete)(struct io_comp_batch *); }; +static inline bool blk_atomic_write_start_sect_aligned(sector_t sector, + struct queue_limits *limits) +{ + unsigned int alignment = max(limits->atomic_write_hw_unit_min, + limits->atomic_write_hw_boundary); + + return IS_ALIGNED(sector, alignment >> SECTOR_SHIFT); +} + static inline bool bdev_can_atomic_write(struct block_device *bdev) { struct request_queue *bd_queue = bdev->bd_queue; @@ -1707,15 +1723,9 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev) if (!limits->atomic_write_unit_min) return false; - if (bdev_is_partition(bdev)) { - sector_t bd_start_sect = bdev->bd_start_sect; - unsigned int alignment = - max(limits->atomic_write_unit_min, - limits->atomic_write_hw_boundary); - - if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT)) - return false; - } + if (bdev_is_partition(bdev)) + return blk_atomic_write_start_sect_aligned(bdev->bd_start_sect, + limits); return true; } diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f41c7f0ef91e..ba8f52d48b94 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -286,12 +286,7 @@ static inline void *bvec_virt(struct bio_vec *bvec) */ static inline phys_addr_t bvec_phys(const struct bio_vec *bvec) { - /* - * Note this open codes page_to_phys because page_to_phys is defined in - * <asm/io.h>, which we don't want to pull in here. If it ever moves to - * a sensible place we should start using it. - */ - return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset; + return page_to_phys(bvec->bv_page) + bvec->bv_offset; } #endif /* __LINUX_BVEC_H */ diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b4430634..0bf7d33a1048 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -82,6 +82,14 @@ enum cc_attr { CC_ATTR_GUEST_SEV_SNP, /** + * @CC_ATTR_GUEST_SNP_SECURE_TSC: SNP Secure TSC is active. + * + * The platform/OS is running as a guest/virtual machine and actively + * using AMD SEV-SNP Secure TSC feature. + */ + CC_ATTR_GUEST_SNP_SECURE_TSC, + + /** * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. * * The host kernel is running with the necessary features diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h new file mode 100644 index 000000000000..dd4869f1d736 --- /dev/null +++ b/include/linux/cgroup_dmem.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _CGROUP_DMEM_H +#define _CGROUP_DMEM_H + +#include <linux/types.h> +#include <linux/llist.h> + +struct dmem_cgroup_pool_state; + +/* Opaque definition of a cgroup region, used internally */ +struct dmem_cgroup_region; + +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) __printf(2,3); +void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region); +int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, + struct dmem_cgroup_pool_state **ret_pool, + struct dmem_cgroup_pool_state **ret_limit_pool); +void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size); +bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, + struct dmem_cgroup_pool_state *test_pool, + bool ignore_low, bool *ret_hit_low); + +void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool); +#else +static inline __printf(2,3) struct dmem_cgroup_region * +dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) +{ + return NULL; +} + +static inline void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) +{ } + +static inline int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, + struct dmem_cgroup_pool_state **ret_pool, + struct dmem_cgroup_pool_state **ret_limit_pool) +{ + *ret_pool = NULL; + + if (ret_limit_pool) + *ret_limit_pool = NULL; + + return 0; +} + +static inline void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) +{ } + +static inline +bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, + struct dmem_cgroup_pool_state *test_pool, + bool ignore_low, bool *ret_hit_low) +{ + return true; +} + +static inline void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) +{ } + +#endif +#endif /* _CGROUP_DMEM_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 445235487230..3fd0bcbf3080 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -65,6 +65,10 @@ SUBSYS(rdma) SUBSYS(misc) #endif +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +SUBSYS(dmem) +#endif + /* * The following subsystems are not supported on the default hierarchy. */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d0ed9583743f..c9b58188ec61 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -52,18 +52,6 @@ */ #define barrier_before_unreachable() asm volatile("") -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - */ -#define unreachable() \ - do { \ - annotate_unreachable(); \ - barrier_before_unreachable(); \ - __builtin_unreachable(); \ - } while (0) - #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 240c632c5b95..efd43df3a99a 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -109,44 +109,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Unreachable code */ #ifdef CONFIG_OBJTOOL -/* - * These macros help objtool understand GCC code flow for unreachable code. - * The __COUNTER__ based labels are a hack to make each instance of the macros - * unique, to convince GCC not to merge duplicate inline asm statements. - */ -#define __stringify_label(n) #n - -#define __annotate_reachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ -}) -#define annotate_reachable() __annotate_reachable(__COUNTER__) - -#define __annotate_unreachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ -}) -#define annotate_unreachable() __annotate_unreachable(__COUNTER__) - /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") - #else /* !CONFIG_OBJTOOL */ -#define annotate_reachable() -#define annotate_unreachable() #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ -#ifndef unreachable -# define unreachable() do { \ - annotate_unreachable(); \ +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + */ +#define unreachable() do { \ + barrier_before_unreachable(); \ __builtin_unreachable(); \ } while (0) -#endif /* * KENTRY - kernel entry point diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 45e598fe3476..77e6e195d1d6 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -52,8 +52,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); #define __COREDUMP_PRINTK(Level, Format, ...) \ do { \ char comm[TASK_COMM_LEN]; \ - \ - get_task_comm(comm, current); \ + /* This will always be NUL terminated. */ \ + memcpy(comm, current->comm, sizeof(comm)); \ printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n", \ task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__); \ } while (0) \ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a04b73c40173..6cc5e484547c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -240,6 +240,7 @@ enum cpuhp_state { CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, + CPUHP_AP_KTHREADS_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40, diff --git a/include/linux/cred.h b/include/linux/cred.h index e4a3155fe409..0c3c4b16b469 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -155,8 +155,6 @@ extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *); -extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); @@ -172,12 +170,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } -/* - * Override creds without bumping reference count. Caller must ensure - * reference remains valid or has taken reference. Almost always not the - * interface you want. Use override_creds()/revert_creds() instead. - */ -static inline const struct cred *override_creds_light(const struct cred *override_cred) +static inline const struct cred *override_creds(const struct cred *override_cred) { const struct cred *old = current->cred; @@ -185,35 +178,12 @@ static inline const struct cred *override_creds_light(const struct cred *overrid return old; } -static inline void revert_creds_light(const struct cred *revert_cred) -{ - rcu_assign_pointer(current->cred, revert_cred); -} - -/** - * get_new_cred_many - Get references on a new set of credentials - * @cred: The new credentials to reference - * @nr: Number of references to acquire - * - * Get references on the specified set of new credentials. The caller must - * release all acquired references. - */ -static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +static inline const struct cred *revert_creds(const struct cred *revert_cred) { - atomic_long_add(nr, &cred->usage); - return cred; -} + const struct cred *override_cred = current->cred; -/** - * get_new_cred - Get a reference on a new set of credentials - * @cred: The new credentials to reference - * - * Get a reference on the specified set of new credentials. The caller must - * release the reference. - */ -static inline struct cred *get_new_cred(struct cred *cred) -{ - return get_new_cred_many(cred, 1); + rcu_assign_pointer(current->cred, revert_cred); + return override_cred; } /** @@ -236,7 +206,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) if (!cred) return cred; nonconst_cred->non_rcu = 0; - return get_new_cred_many(nonconst_cred, nr); + atomic_long_add(nr, &nonconst_cred->usage); + return cred; } /* diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index cdc4757217f9..b18658bce2c3 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -48,6 +48,7 @@ struct fwnode_handle; * will never get called until they do. * @remove: Called when a device removed from this bus. * @shutdown: Called at shut-down time to quiesce the device. + * @irq_get_affinity: Get IRQ affinity mask for the device on this bus. * * @online: Called to put the device back online (after offlining it). * @offline: Called to put the device offline for hot-removal. May fail. @@ -87,6 +88,8 @@ struct bus_type { void (*sync_state)(struct device *dev); void (*remove)(struct device *dev); void (*shutdown)(struct device *dev); + const struct cpumask *(*irq_get_affinity)(struct device *dev, + unsigned int irq_vec); int (*online)(struct device *dev); int (*offline)(struct device *dev); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 4cc8801e50e3..a087606ace19 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -3,6 +3,7 @@ #define LINUX_EXPORTFS_H 1 #include <linux/types.h> +#include <linux/path.h> struct dentry; struct iattr; @@ -156,6 +157,17 @@ struct fid { }; }; +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ @@ -225,6 +237,12 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * permission: + * Allow filesystems to specify a custom permission function. + * + * open: + * Allow filesystems to specify a custom open function. + * * commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * @@ -251,6 +269,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); + struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index c50882f19235..966092ffa89a 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -5,12 +5,18 @@ #include <uapi/linux/fiemap.h> #include <linux/fs.h> +/** + * struct fiemap_extent_info - fiemap request to a filesystem + * @fi_flags: Flags as passed from user + * @fi_extents_mapped: Number of mapped extents + * @fi_extents_max: Size of fiemap_extent array + * @fi_extents_start: Start of fiemap_extent array + */ struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent __user *fi_extents_start; /* Start of - fiemap_extent array */ + unsigned int fi_flags; + unsigned int fi_extents_mapped; + unsigned int fi_extents_max; + struct fiemap_extent __user *fi_extents_start; }; int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 3abe614ef5f0..4d3f8074c137 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -37,16 +37,20 @@ struct folio_queue { #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif + unsigned int rreq_id; + unsigned int debug_id; }; /** * folioq_init - Initialise a folio queue segment * @folioq: The segment to initialise + * @rreq_id: The request identifier to use in tracelines. * - * Initialise a folio queue segment. Note that the folio pointers are - * left uninitialised. + * Initialise a folio queue segment and set an identifier to be used in traces. + * + * Note that the folio pointers are left uninitialised. */ -static inline void folioq_init(struct folio_queue *folioq) +static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) { folio_batch_init(&folioq->vec); folioq->next = NULL; @@ -54,6 +58,8 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->marks = 0; folioq->marks2 = 0; folioq->marks3 = 0; + folioq->rreq_id = rreq_id; + folioq->debug_id = 0; } /** diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index f39869588117..702099f08929 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -5,47 +5,68 @@ #include <linux/compiler.h> #include <linux/ftrace.h> -#include <linux/rethook.h> +#include <linux/rcupdate.h> +#include <linux/refcount.h> +#include <linux/slab.h> struct fprobe; - typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *entry_data); typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *entry_data); /** + * struct fprobe_hlist_node - address based hash list node for fprobe. + * + * @hlist: The hlist node for address search hash table. + * @addr: One of the probing address of @fp. + * @fp: The fprobe which owns this. + */ +struct fprobe_hlist_node { + struct hlist_node hlist; + unsigned long addr; + struct fprobe *fp; +}; + +/** + * struct fprobe_hlist - hash list nodes for fprobe. + * + * @hlist: The hlist node for existence checking hash table. + * @rcu: rcu_head for RCU deferred release. + * @fp: The fprobe which owns this fprobe_hlist. + * @size: The size of @array. + * @array: The fprobe_hlist_node for each address to probe. + */ +struct fprobe_hlist { + struct hlist_node hlist; + struct rcu_head rcu; + struct fprobe *fp; + int size; + struct fprobe_hlist_node array[] __counted_by(size); +}; + +/** * struct fprobe - ftrace based probe. - * @ops: The ftrace_ops. + * * @nmissed: The counter for missing events. * @flags: The status flag. - * @rethook: The rethook data structure. (internal data) * @entry_data_size: The private data storage size. - * @nr_maxactive: The max number of active functions. * @entry_handler: The callback function for function entry. * @exit_handler: The callback function for function exit. + * @hlist_array: The fprobe_hlist for fprobe search from IP hash table. */ struct fprobe { -#ifdef CONFIG_FUNCTION_TRACER - /* - * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too. - * But user of fprobe may keep embedding the struct fprobe on their own - * code. To avoid build error, this will keep the fprobe data structure - * defined here, but remove ftrace_ops data structure. - */ - struct ftrace_ops ops; -#endif unsigned long nmissed; unsigned int flags; - struct rethook *rethook; size_t entry_data_size; - int nr_maxactive; fprobe_entry_cb entry_handler; fprobe_exit_cb exit_handler; + + struct fprobe_hlist *hlist_array; }; /* This fprobe is soft-disabled. */ @@ -121,4 +142,9 @@ static inline void enable_fprobe(struct fprobe *fp) fp->flags &= ~FPROBE_FL_DISABLED; } +/* The entry data size is 4 bits (=16) * sizeof(long) in maximum */ +#define FPROBE_DATA_SIZE_BITS 4 +#define MAX_FPROBE_DATA_SIZE_WORD ((1L << FPROBE_DATA_SIZE_BITS) - 1) +#define MAX_FPROBE_DATA_SIZE (MAX_FPROBE_DATA_SIZE_WORD * sizeof(long)) + #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e29433c5ecc..a4af70367f8a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -322,6 +322,7 @@ struct readahead_control; #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC +#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) @@ -348,6 +349,7 @@ struct readahead_control; #define IOCB_DIO_CALLER_COMP (1 << 22) /* kiocb is a read or write operation submitted by fs/aio.c. */ #define IOCB_AIO_RW (1 << 23) +#define IOCB_HAS_METADATA (1 << 24) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -356,7 +358,8 @@ struct readahead_control; { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ - { IOCB_ATOMIC, "ATOMIC"}, \ + { IOCB_ATOMIC, "ATOMIC" }, \ + { IOCB_DONTCACHE, "DONTCACHE" }, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ @@ -626,6 +629,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 #define IOP_MGTIME 0x0020 +#define IOP_CACHED_LINK 0x0040 /* * Keep mostly read-only and often accessed (especially for @@ -723,7 +727,10 @@ struct inode { }; struct file_lock_context *i_flctx; struct address_space i_data; - struct list_head i_devices; + union { + struct list_head i_devices; + int i_linklen; + }; union { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; @@ -749,6 +756,13 @@ struct inode { void *i_private; /* fs or device private pointer */ } __randomize_layout; +static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) +{ + inode->i_link = link; + inode->i_linklen = linklen; + inode->i_opflags |= IOP_CACHED_LINK; +} + /* * Get bit address from inode->i_state to use with wait_var_event() * infrastructre. @@ -2127,6 +2141,8 @@ struct file_operations { #define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) +/* File system supports uncached read/write buffered IO */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -3351,7 +3367,7 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int readlink_copy(char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); @@ -3468,7 +3484,6 @@ struct offset_ctx { void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); -int simple_offset_empty(struct dentry *dentry); int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, @@ -3614,6 +3629,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) return -EOPNOTSUPP; } + if (flags & RWF_DONTCACHE) { + /* file system must support it */ + if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE)) + return -EOPNOTSUPP; + /* DAX mappings not supported */ + if (IS_DAX(ki->ki_filp->f_mapping->host)) + return -EOPNOTSUPP; + } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 3cef566088fc..53e566efd5fd 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -84,6 +84,8 @@ extern int fs_lookup_param(struct fs_context *fc, extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); +extern const struct constant_table bool_names[]; + #ifdef CONFIG_VALIDATE_FS_PARSER extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, int low, int high, int special); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aa9ddd1e4bb6..07092dfb21a4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,8 @@ struct dyn_ftrace; char *arch_ftrace_match_adjust(char *str, const char *search); -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL -struct fgraph_ret_regs; -unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs); +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS +unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs); #else unsigned long ftrace_return_to_handler(unsigned long frame_pointer); #endif @@ -134,6 +133,13 @@ extern int ftrace_enabled; * Also, architecture dependent fields can be used for internal process. * (e.g. orig_ax on x86_64) * + * Basically, ftrace_regs stores the registers related to the context. + * On function entry, registers for function parameters and hooking the + * function call are stored, and on function exit, registers for function + * return value and frame pointers are stored. + * + * And also, it dpends on the context that which registers are restored + * from the ftrace_regs. * On the function entry, those registers will be restored except for * the stack pointer, so that user can change the function parameters * and instruction pointer (e.g. live patching.) @@ -170,6 +176,12 @@ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ +#ifdef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS + +static_assert(sizeof(struct pt_regs) == ftrace_regs_size()); + +#endif /* CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ + static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) { if (!fregs) @@ -178,6 +190,54 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs return arch_ftrace_get_regs(fregs); } +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ + defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) + +static __always_inline struct pt_regs * +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + /* + * If CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS=y, ftrace_regs memory + * layout is including pt_regs. So always returns that address. + * Since arch_ftrace_get_regs() will check some members and may return + * NULL, we can not use it. + */ + return &arch_ftrace_regs(fregs)->regs; +} + +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +/* + * Please define arch dependent pt_regs which compatible to the + * perf_arch_fetch_caller_regs() but based on ftrace_regs. + * This requires + * - user_mode(_regs) returns false (always kernel mode). + * - able to use the _regs for stack trace. + */ +#ifndef arch_ftrace_fill_perf_regs +/* As same as perf_arch_fetch_caller_regs(), do nothing by default */ +#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0) +#endif + +static __always_inline struct pt_regs * +ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + arch_ftrace_fill_perf_regs(fregs, regs); + return regs; +} + +#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ + +static __always_inline struct pt_regs * +ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + return &arch_ftrace_regs(fregs)->regs; +} + +#endif + /* * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs. * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs. @@ -190,6 +250,23 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } +#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API +static __always_inline unsigned long +ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth) +{ + unsigned long *stackp; + + stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs); + if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) == + ((unsigned long)stackp & ~(THREAD_SIZE - 1))) + return *(stackp + nth); + + return 0; +} +#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */ +#define ftrace_regs_get_kernel_stack_nth(fregs, nth) (0L) +#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */ + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -545,6 +622,19 @@ enum { FTRACE_MAY_SLEEP = (1 << 5), }; +/* Arches can override ftrace_get_symaddr() to convert fentry_ip to symaddr. */ +#ifndef ftrace_get_symaddr +/** + * ftrace_get_symaddr - return the symbol address from fentry_ip + * @fentry_ip: the address of ftrace location + * + * Get the symbol address from @fentry_ip (fast path). If there is no fast + * search path, this returns 0. + * User may need to use kallsyms API to find the symbol address. + */ +#define ftrace_get_symaddr(fentry_ip) (0) +#endif + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); @@ -1069,12 +1159,15 @@ struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, - struct fgraph_ops *); /* return */ + struct fgraph_ops *, + struct ftrace_regs *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *); /* entry */ + struct fgraph_ops *, + struct ftrace_regs *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops); + struct fgraph_ops *gops, + struct ftrace_regs *fregs); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1114,8 +1207,15 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -function_graph_enter(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp); +function_graph_enter_regs(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp, + struct ftrace_regs *fregs); + +static inline int function_graph_enter(unsigned long ret, unsigned long func, + unsigned long fp, unsigned long *retp) +{ + return function_graph_enter_regs(ret, func, fp, retp, NULL); +} struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index be1ed0c891d0..bbc1873ca6b8 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -30,6 +30,8 @@ struct ftrace_regs; override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) +#define ftrace_regs_get_frame_pointer(fregs) \ + frame_pointer(&arch_ftrace_regs(fregs)->regs) #endif /* HAVE_ARCH_FTRACE_REGS */ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 7ef5f7ef31a9..f7bfdcf0dda3 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -386,6 +386,7 @@ extern void __init hrtimers_init(void); extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); +int hrtimers_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int hrtimers_cpu_dying(unsigned int cpu); #else diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index bc7babe91b2e..bf675a8aef8a 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -4,14 +4,14 @@ #ifdef CONFIG_NOINSTR_VALIDATION +#include <linux/objtool.h> #include <linux/stringify.h> /* Begin/end of an instrumentation safe region */ #define __instrumentation_begin(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_begin\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + ANNOTATE_INSTR_BEGIN(__ASM_BREF(c)) \ + : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -48,9 +48,8 @@ */ #define __instrumentation_end(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_end\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + ANNOTATE_INSTR_END(__ASM_BREF(c)) \ + : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else /* !CONFIG_NOINSTR_VALIDATION */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fd4cdb0860a2..623d8e798a11 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -78,8 +78,9 @@ struct io_hash_table { struct io_mapped_region { struct page **pages; - void *vmap_ptr; - size_t nr_pages; + void *ptr; + unsigned nr_pages; + unsigned flags; }; /* @@ -293,6 +294,11 @@ struct io_ring_ctx { struct io_submit_state submit_state; + /* + * Modifications are protected by ->uring_lock and ->mmap_lock. + * The flags, buf_pages and buf_nr_pages fields should be stable + * once published. + */ struct xarray io_bl_xa; struct io_hash_table cancel_table; @@ -424,17 +430,10 @@ struct io_ring_ctx { * side will need to grab this lock, to prevent either side from * being run concurrently with the other. */ - struct mutex resize_lock; - - /* - * If IORING_SETUP_NO_MMAP is used, then the below holds - * the gup'ed pages for the two rings, and the sqes. - */ - unsigned short n_ring_pages; - unsigned short n_sqe_pages; - struct page **ring_pages; - struct page **sqe_pages; + struct mutex mmap_lock; + struct io_mapped_region sq_region; + struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; }; @@ -481,6 +480,7 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_HAS_METADATA_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -561,6 +561,8 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* request has read/write metadata assigned */ + REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); diff --git a/include/linux/irq.h b/include/linux/irq.h index fa711f80957b..8daa17f0107a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -64,7 +64,6 @@ enum irqchip_irq_state; * IRQ_NOAUTOEN - Interrupt is not automatically enabled in * request/setup_irq() * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set) - * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context * IRQ_NESTED_THREAD - Interrupt nests into another thread * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable * IRQ_IS_POLLED - Always polled by another interrupt. Exclude @@ -93,7 +92,6 @@ enum { IRQ_NOREQUEST = (1 << 11), IRQ_NOAUTOEN = (1 << 12), IRQ_NO_BALANCING = (1 << 13), - IRQ_MOVE_PCNTXT = (1 << 14), IRQ_NESTED_THREAD = (1 << 15), IRQ_NOTHREAD = (1 << 16), IRQ_PER_CPU_DEVID = (1 << 17), @@ -105,7 +103,7 @@ enum { #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ - IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ + IRQ_NOAUTOEN | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN) @@ -201,8 +199,6 @@ struct irq_data { * IRQD_LEVEL - Interrupt is level triggered * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup * from suspend - * IRQD_MOVE_PCNTXT - Interrupt can be moved in process - * context * IRQD_IRQ_DISABLED - Disabled state of the interrupt * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt @@ -233,7 +229,6 @@ enum { IRQD_AFFINITY_SET = BIT(12), IRQD_LEVEL = BIT(13), IRQD_WAKEUP_STATE = BIT(14), - IRQD_MOVE_PCNTXT = BIT(15), IRQD_IRQ_DISABLED = BIT(16), IRQD_IRQ_MASKED = BIT(17), IRQD_IRQ_INPROGRESS = BIT(18), @@ -338,11 +333,6 @@ static inline bool irqd_is_wakeup_set(struct irq_data *d) return __irqd_to_state(d) & IRQD_WAKEUP_STATE; } -static inline bool irqd_can_move_in_process_context(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MOVE_PCNTXT; -} - static inline bool irqd_irq_disabled(struct irq_data *d) { return __irqd_to_state(d) & IRQD_IRQ_DISABLED; @@ -567,6 +557,7 @@ struct irq_chip { * in the suspend path if they are in disabled state * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup * IRQCHIP_IMMUTABLE: Don't ever change anything in this chip + * IRQCHIP_MOVE_DEFERRED: Move the interrupt in actual interrupt context */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -581,6 +572,7 @@ enum { IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), IRQCHIP_IMMUTABLE = (1 << 11), + IRQCHIP_MOVE_DEFERRED = (1 << 12), }; #include <linux/irqdesc.h> @@ -694,6 +686,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); #endif +/* Disable or mask interrupts during a kernel kexec */ +extern void machine_kexec_mask_interrupts(void); + /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); diff --git a/include/linux/kref.h b/include/linux/kref.h index d32e21a2538c..88e82ab1367c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -46,18 +46,18 @@ static inline void kref_get(struct kref *kref) } /** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the + * kref_put - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. + * Decrement the refcount, and if 0, call @release. The caller may not + * pass NULL or kfree() as the release function. + * + * Return: 1 if this call removed the object, otherwise return 0. Beware, + * if this function returns 0, another caller may have removed the object + * by the time this function returns. The return value is only certain + * if you want to see if the object is definitely released. */ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { @@ -68,17 +68,37 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) return 0; } +/** + * kref_put_mutex - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @mutex: Mutex which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @mutex + * held. The @release function will release the mutex. + */ static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), - struct mutex *lock) + struct mutex *mutex) { - if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { + if (refcount_dec_and_mutex_lock(&kref->refcount, mutex)) { release(kref); return 1; } return 0; } +/** + * kref_put_lock - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @lock: Spinlock which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @lock + * held. The @release function will release the lock. + */ static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) @@ -94,8 +114,6 @@ static inline int kref_put_lock(struct kref *kref, * kref_get_unless_zero - Increment refcount for object unless it is zero. * @kref: object. * - * Return non-zero if the increment succeeded. Otherwise return 0. - * * This function is intended to simplify locking around refcounting for * objects that can be looked up from a lookup structure, and which are * removed from that lookup structure in the object destructor. @@ -105,6 +123,8 @@ static inline int kref_put_lock(struct kref *kref, * With a lookup followed by a kref_get_unless_zero *with return value check* * locking in the kref_put path can be deferred to the actual removal from * the lookup structure and RCU lookups become trivial. + * + * Return: non-zero if the increment succeeded. Otherwise return 0. */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { diff --git a/include/linux/kthread.h b/include/linux/kthread.h index b11f53c1ba2e..8d27403888ce 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -85,6 +85,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data, void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); +int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask); int kthread_stop(struct task_struct *k); int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); @@ -186,13 +187,58 @@ extern void __kthread_init_worker(struct kthread_worker *worker, int kthread_worker_fn(void *worker_ptr); -__printf(2, 3) -struct kthread_worker * -kthread_create_worker(unsigned int flags, const char namefmt[], ...); +__printf(3, 4) +struct kthread_worker *kthread_create_worker_on_node(unsigned int flags, + int node, + const char namefmt[], ...); + +#define kthread_create_worker(flags, namefmt, ...) \ + kthread_create_worker_on_node(flags, NUMA_NO_NODE, namefmt, ## __VA_ARGS__); + +/** + * kthread_run_worker - create and wake a kthread worker. + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the thread. + * + * Description: Convenient wrapper for kthread_create_worker() followed by + * wake_up_process(). Returns the kthread_worker or ERR_PTR(-ENOMEM). + */ +#define kthread_run_worker(flags, namefmt, ...) \ +({ \ + struct kthread_worker *__kw \ + = kthread_create_worker(flags, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__kw)) \ + wake_up_process(__kw->task); \ + __kw; \ +}) -__printf(3, 4) struct kthread_worker * +struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, - const char namefmt[], ...); + const char namefmt[]); + +/** + * kthread_run_worker_on_cpu - create and wake a cpu bound kthread worker. + * @cpu: CPU number + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: Convenient wrapper for kthread_create_worker_on_cpu() + * followed by wake_up_process(). Returns the kthread_worker or + * ERR_PTR(-ENOMEM). + */ +static inline struct kthread_worker * +kthread_run_worker_on_cpu(int cpu, unsigned int flags, + const char namefmt[]) +{ + struct kthread_worker *kw; + + kw = kthread_create_worker_on_cpu(cpu, flags, namefmt); + if (!IS_ERR(kw)) + wake_up_process(kw->task); + + return kw; +} bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); diff --git a/include/linux/libata.h b/include/linux/libata.h index c1a85d46eba6..be5183d75736 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1467,13 +1467,13 @@ extern const struct attribute_group *ata_common_sdev_groups[]; #define ATA_SUBBASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .can_queue = ATA_DEF_QUEUE, \ - .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ + .tag_alloc_policy_rr = true, \ .device_configure = ata_scsi_device_configure #define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ __ATA_BASE_SHT(drv_name), \ .can_queue = drv_qd, \ - .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ + .tag_alloc_policy_rr = true, \ .device_configure = ata_scsi_device_configure #define ATA_BASE_SHT(drv_name) \ diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h index fc388da6a027..0d68f9d6a6a7 100644 --- a/include/linux/libgcc.h +++ b/include/linux/libgcc.h @@ -34,4 +34,8 @@ long long notrace __lshrdi3(long long u, word_type b); long long notrace __muldi3(long long u, long long v); word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b); +#ifdef CONFIG_HAVE_ARCH_LIBGCC_H +#include <asm/libgcc.h> +#endif + #endif /* __ASM_LIBGCC_H */ diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c3a1f78bc884..c39f119659ba 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -34,14 +34,24 @@ struct lockref { }; }; -extern void lockref_get(struct lockref *); -extern int lockref_put_return(struct lockref *); -extern int lockref_get_not_zero(struct lockref *); -extern int lockref_put_not_zero(struct lockref *); -extern int lockref_put_or_lock(struct lockref *); - -extern void lockref_mark_dead(struct lockref *); -extern int lockref_get_not_dead(struct lockref *); +/** + * lockref_init - Initialize a lockref + * @lockref: pointer to lockref structure + * @count: initial count + */ +static inline void lockref_init(struct lockref *lockref, unsigned int count) +{ + spin_lock_init(&lockref->lock); + lockref->count = count; +} + +void lockref_get(struct lockref *lockref); +int lockref_put_return(struct lockref *lockref); +bool lockref_get_not_zero(struct lockref *lockref); +bool lockref_put_or_lock(struct lockref *lockref); + +void lockref_mark_dead(struct lockref *lockref); +bool lockref_get_not_dead(struct lockref *lockref); /* Must be called under spinlock for reliable results */ static inline bool __lockref_is_dead(const struct lockref *l) diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 97a8b21eb033..e13d2f947b51 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -77,6 +77,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_LOCKDOWN 15 #define LSM_AUDIT_DATA_NOTIFICATION 16 #define LSM_AUDIT_DATA_ANONINODE 17 +#define LSM_AUDIT_DATA_NLMSGTYPE 18 union { struct path path; struct dentry *dentry; @@ -98,6 +99,7 @@ struct common_audit_data { struct lsm_ibendport_audit *ibendport; int reason; const char *anonclass; + u16 nlmsg_type; } u; /* this union contains LSM specific data */ union { @@ -116,14 +118,28 @@ struct common_audit_data { #define v4info fam.v4 #define v6info fam.v6 +#ifdef CONFIG_AUDIT + int ipv4_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto); +#if IS_ENABLED(CONFIG_IPV6) int ipv6_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto); +#endif /* IS_ENABLED(CONFIG_IPV6) */ void common_lsm_audit(struct common_audit_data *a, void (*pre_audit)(struct audit_buffer *, void *), void (*post_audit)(struct audit_buffer *, void *)); +#else /* CONFIG_AUDIT */ + +static inline void common_lsm_audit(struct common_audit_data *a, + void (*pre_audit)(struct audit_buffer *, void *), + void (*post_audit)(struct audit_buffer *, void *)) +{ +} + +#endif /* CONFIG_AUDIT */ + #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index eb2937599cb0..e2f1ce37c41e 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -83,7 +83,7 @@ LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, u32 *ctxlen) + struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) @@ -295,17 +295,16 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, - u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, struct lsm_context *cp) LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) +LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, - u32 *ctxlen) +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, + struct lsm_context *cp) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index e781727c8916..6325f6ffb895 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -15,8 +15,8 @@ */ #define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ struct _name { \ - int nr; \ - int size; \ + size_t nr; \ + size_t size; \ _type *data; \ _type preallocated[_nr]; \ } diff --git a/include/linux/mm.h b/include/linux/mm.h index b1c3db9cf355..f02925447e59 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -711,7 +711,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * we don't rely on for anything - the mm_lock_seq read against which we * need ordering is below. */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) return false; if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) @@ -728,7 +728,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * after it has been unlocked. * This pairs with RELEASE semantics in vma_end_write_all(). */ - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { up_read(&vma->vm_lock->lock); return false; } @@ -743,7 +743,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) } /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -751,7 +751,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) * current task is holding mmap_write_lock, both vma->vm_lock_seq and * mm->mm_lock_seq can't be concurrently modified. */ - *mm_lock_seq = vma->vm_mm->mm_lock_seq; + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; return (vma->vm_lock_seq == *mm_lock_seq); } @@ -762,7 +762,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) */ static inline void vma_start_write(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; if (__is_vma_write_locked(vma, &mm_lock_seq)) return; @@ -780,7 +780,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) static inline void vma_assert_write_locked(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 332cee285662..825c04b56403 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -727,7 +727,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif @@ -921,6 +921,9 @@ struct mm_struct { * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. + * Incremented every time mmap_lock is write-locked/unlocked. + * Initialized to 0, therefore odd values indicate mmap_lock + * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. @@ -929,7 +932,7 @@ struct mm_struct { * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ - int mm_lock_seq; + seqcount_t mm_lock_seq; #endif diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index de9dc20b01ba..45a21faa3ff6 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -71,39 +71,68 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK -/* - * Drop all currently-held per-VMA locks. - * This is called from the mmap_lock implementation directly before releasing - * a write-locked mmap_lock (or downgrading it to read-locked). - * This should normally NOT be called manually from other places. - * If you want to call this manually anyway, keep in mind that this will release - * *all* VMA write locks, including ones from further up the stack. - */ -static inline void vma_end_write_all(struct mm_struct *mm) + +static inline void mm_lock_seqcount_init(struct mm_struct *mm) +{ + seqcount_init(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) +{ + do_raw_write_seqcount_begin(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_end(struct mm_struct *mm) +{ + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); + do_raw_write_seqcount_end(&mm->mm_lock_seq); +} + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) { - mmap_assert_write_locked(mm); /* - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive - * mmap_lock being held. - * We need RELEASE semantics here to ensure that preceding stores into - * the VMA take effect before we unlock it with this store. - * Pairs with ACQUIRE semantics in vma_start_read(). + * Since mmap_lock is a sleeping lock, and waiting for it to become + * unlocked is more or less equivalent with taking it ourselves, don't + * bother with the speculative path if mmap_lock is already write-locked + * and take the slow path, which takes the lock. */ - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); + return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return read_seqcount_retry(&mm->mm_lock_seq, seq); } -#else -static inline void vma_end_write_all(struct mm_struct *mm) {} -#endif + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) +{ + return false; +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return true; +} + +#endif /* CONFIG_PER_VMA_LOCK */ static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); + mm_lock_seqcount_init(mm); } static inline void mmap_write_lock(struct mm_struct *mm) { __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -111,6 +140,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) { __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -120,10 +150,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) __mmap_lock_trace_start_locking(mm, true); ret = down_write_killable(&mm->mmap_lock); + if (!ret) + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, ret == 0); return ret; } +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + mm_lock_seqcount_end(mm); +} + static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index bbaec80c78c5..ac01dc4eb2ce 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -24,6 +24,7 @@ static inline void leave_mm(void) { } #ifndef task_cpu_possible_mask # define task_cpu_possible_mask(p) cpu_possible_mask # define task_cpu_possible(cpu, p) true +# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK) #else # define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p)) #endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4338b1b4ac44..d67614f7b7f1 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -700,6 +700,8 @@ struct x86_cpu_id { #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 #define X86_STEPPING_ANY 0 +#define X86_STEP_MIN 0 +#define X86_STEP_MAX 0xf #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ /* diff --git a/include/linux/mount.h b/include/linux/mount.h index 04213d8ef837..dcc17ce8a959 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -75,7 +75,7 @@ struct vfsmount { static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ - return smp_load_acquire(&mnt->mnt_idmap); + return READ_ONCE(mnt->mnt_idmap); } extern int mnt_want_write(struct vfsmount *mnt); @@ -113,7 +113,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *); extern void kern_unmount(struct vfsmount *mnt); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); -extern long do_mount(const char *, const char __user *, +int do_mount(const char *, const char __user *, const char *, unsigned long, void *); extern struct vfsmount *collect_mounts(const struct path *); extern void drop_collected_mounts(struct vfsmount *); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ecdd5ced16a8..071d05d81d38 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -18,9 +18,11 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/uio.h> +#include <linux/rolling_buffer.h> enum netfs_sreq_ref_trace; typedef struct mempool_s mempool_t; +struct folio_queue; /** * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] @@ -71,6 +73,7 @@ struct netfs_inode { #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ +#define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */ }; /* @@ -178,9 +181,6 @@ struct netfs_io_subrequest { unsigned long long start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ - size_t consumed; /* Amount of read data consumed */ - size_t prev_donated; /* Amount of data donated from previous subreq */ - size_t next_donated; /* Amount of data donated from next subreq */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ @@ -188,9 +188,6 @@ struct netfs_io_subrequest { u8 retry_count; /* The number of retries (0 on initial pass) */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ - unsigned char curr_folioq_slot; /* Folio currently being read */ - unsigned char curr_folio_order; /* Order of folio */ - struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ @@ -208,9 +205,11 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ + NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ + NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_WRITE, /* This is a direct I/O write */ @@ -231,16 +230,16 @@ struct netfs_io_request { struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ - struct list_head subrequests; /* Contributory I/O operations */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ - struct folio_queue *buffer; /* Head of I/O buffer */ - struct folio_queue *buffer_tail; /* Tail of I/O buffer */ - struct iov_iter iter; /* Unencrypted-side iterator */ - struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ + struct rolling_buffer buffer; /* Unencrypted buffer */ +#define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1 +#define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2 + wait_queue_head_t waitq; /* Processor waiter */ void *netfs_priv; /* Private data for the netfs */ void *netfs_priv2; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ @@ -251,28 +250,28 @@ struct netfs_io_request { atomic_t subreq_counter; /* Next subreq->debug_index */ unsigned int nr_group_rel; /* Number of refs to release on ->group */ spinlock_t lock; /* Lock for queuing subreqs */ - atomic_t nr_outstanding; /* Number of ops in progress */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ long error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ - u8 buffer_head_slot; /* First slot in ->buffer */ - u8 buffer_tail_slot; /* Next slot in ->buffer_tail */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ + unsigned long long abandon_to; /* Position to abandon folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ - size_t prev_donated; /* Fallback for subreq->prev_donated */ + unsigned char front_folio_order; /* Order (size) of front folio */ refcount_t ref; unsigned long flags; +#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ @@ -409,6 +408,13 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * struct netfs_group *netfs_group); ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); +/* Single, monolithic object read/write API. */ +void netfs_single_mark_inode_dirty(struct inode *inode); +ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter); +int netfs_writeback_single(struct address_space *mapping, + struct writeback_control *wbc, + struct iov_iter *iter); + /* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); @@ -428,10 +434,8 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ -void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, - bool was_async); -void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, - int error, bool was_async); +void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq); +void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, @@ -453,6 +457,18 @@ void netfs_end_io_write(struct inode *inode); int netfs_start_io_direct(struct inode *inode); void netfs_end_io_direct(struct inode *inode); +/* Miscellaneous APIs. */ +struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp, + unsigned int trace /*enum netfs_folioq_trace*/); +void netfs_folioq_free(struct folio_queue *folioq, + unsigned int trace /*enum netfs_trace_folioq*/); + +/* Buffer wrangling helpers API. */ +int netfs_alloc_folioq_buffer(struct address_space *mapping, + struct folio_queue **_buffer, + size_t *_cur_size, ssize_t size, gfp_t gfp); +void netfs_free_folioq_buffer(struct folio_queue *fq); + /** * netfs_inode - Get the netfs inode context from the inode * @inode: The inode to query diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 13377dde4527..fe3b60818fdc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -64,6 +64,7 @@ enum { /* Transport Type codes for Discovery Log Page entry TRTYPE field */ enum { + NVMF_TRTYPE_PCI = 0, /* PCI */ NVMF_TRTYPE_RDMA = 1, /* RDMA */ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ NVMF_TRTYPE_TCP = 3, /* TCP/IP */ @@ -275,6 +276,7 @@ enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), NVME_CTRL_ATTR_TBKAS = (1 << 6), NVME_CTRL_ATTR_ELBAS = (1 << 15), + NVME_CTRL_ATTR_RHII = (1 << 18), }; struct nvme_id_ctrl { @@ -1896,6 +1898,46 @@ static inline bool nvme_is_fabrics(const struct nvme_command *cmd) return cmd->common.opcode == nvme_fabrics_command; } +#ifdef CONFIG_NVME_VERBOSE_ERRORS +const char *nvme_get_error_status_str(u16 status); +const char *nvme_get_opcode_str(u8 opcode); +const char *nvme_get_admin_opcode_str(u8 opcode); +const char *nvme_get_fabrics_opcode_str(u8 opcode); +#else /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const char *nvme_get_error_status_str(u16 status) +{ + return "I/O Error"; +} +static inline const char *nvme_get_opcode_str(u8 opcode) +{ + return "I/O Cmd"; +} +static inline const char *nvme_get_admin_opcode_str(u8 opcode) +{ + return "Admin Cmd"; +} + +static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) +{ + return "Fabrics Cmd"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ + +static inline const char *nvme_opcode_str(int qid, u8 opcode) +{ + return qid ? nvme_get_opcode_str(opcode) : + nvme_get_admin_opcode_str(opcode); +} + +static inline const char *nvme_fabrics_opcode_str( + int qid, const struct nvme_command *cmd) +{ + if (nvme_is_fabrics(cmd)) + return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); + + return nvme_opcode_str(qid, cmd->common.opcode); +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b3b8d3dab52d..c722a921165b 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -45,29 +45,25 @@ #define STACK_FRAME_NON_STANDARD_FP(func) #endif -#define ANNOTATE_NOENDBR \ - "986: \n\t" \ - ".pushsection .discard.noendbr\n\t" \ - ".long 986b\n\t" \ - ".popsection\n\t" - #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long 998b\n\t" \ ".popsection\n\t" -#else /* __ASSEMBLY__ */ +#define __ASM_BREF(label) label ## b -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL \ - 999: \ - .pushsection .discard.intra_function_calls; \ - .long 999b; \ - .popsection; +#define __ASM_ANNOTATE(label, type) \ + ".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t" \ + ".long " __stringify(label) " - .\n\t" \ + ".long " __stringify(type) "\n\t" \ + ".popsection\n\t" + +#define ASM_ANNOTATE(type) \ + "911:\n\t" \ + __ASM_ANNOTATE(911b, type) + +#else /* __ASSEMBLY__ */ /* * In asm, there are two kinds of code: normal C-type callable functions and @@ -115,34 +111,11 @@ #endif .endm -.macro ANNOTATE_NOENDBR +.macro ANNOTATE type:req .Lhere_\@: - .pushsection .discard.noendbr - .long .Lhere_\@ - .popsection -.endm - -/* - * Use objtool to validate the entry requirement that all code paths do - * VALIDATE_UNRET_END before RET. - * - * NOTE: The macro must be used at the beginning of a global symbol, otherwise - * it will be ignored. - */ -.macro VALIDATE_UNRET_BEGIN -#if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) -.Lhere_\@: - .pushsection .discard.validate_unret + .pushsection .discard.annotate_insn,"M",@progbits,8 .long .Lhere_\@ - . - .popsection -#endif -.endm - -.macro REACHABLE -.Lhere_\@: - .pushsection .discard.reachable - .long .Lhere_\@ + .long \type .popsection .endm @@ -155,20 +128,77 @@ #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) -#define ANNOTATE_NOENDBR -#define ASM_REACHABLE +#define __ASM_ANNOTATE(label, type) +#define ASM_ANNOTATE(type) #else -#define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -.macro ANNOTATE_NOENDBR -.endm -.macro REACHABLE +.macro ANNOTATE type:req .endm #endif #endif /* CONFIG_OBJTOOL */ +#ifndef __ASSEMBLY__ +/* + * Annotate away the various 'relocation to !ENDBR` complaints; knowing that + * these relocations will never be used for indirect calls. + */ +#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) +/* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE) +/* + * See linux/instrumentation.h + */ +#define ANNOTATE_INSTR_BEGIN(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN) +#define ANNOTATE_INSTR_END(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_END) +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS) +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL) +/* + * Use objtool to validate the entry requirement that all code paths do + * VALIDATE_UNRET_END before RET. + * + * NOTE: The macro must be used at the beginning of a global symbol, otherwise + * it will be ignored. + */ +#define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN) +/* + * This should be used to refer to an instruction that is considered + * terminating, like a noreturn CALL or UD2 when we know they are not -- eg + * WARN using UD2. + */ +#define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) + +#else +#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR +#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE +/* ANNOTATE_INSTR_BEGIN ANNOTATE type=ANNOTYPE_INSTR_BEGIN */ +/* ANNOTATE_INSTR_END ANNOTATE type=ANNOTYPE_INSTR_END */ +#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS +#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL +#define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN +#define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#endif + +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) +#define VALIDATE_UNRET_BEGIN ANNOTATE_UNRET_BEGIN +#else +#define VALIDATE_UNRET_BEGIN +#endif + #endif /* _LINUX_OBJTOOL_H */ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 453a4f4ef39d..df5d9fa84dba 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -54,4 +54,16 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 79dbd8bc35a7..46406f3fe34d 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -96,7 +96,7 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) counter->watermark = usage; } -#ifdef CONFIG_MEMCG +#if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM) void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cb99ec8c9e96..8333f132f4a9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1279,6 +1279,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, { int size = 1; + if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) + return; + data->callchain = perf_callchain(event, regs); size += data->callchain->nr; @@ -1287,12 +1292,18 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, } static inline void perf_sample_save_raw_data(struct perf_sample_data *data, + struct perf_event *event, struct perf_raw_record *raw) { struct perf_raw_frag *frag = &raw->frag; u32 sum = 0; int size; + if (!(event->attr.sample_type & PERF_SAMPLE_RAW)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW)) + return; + do { sum += frag->size; if (perf_raw_frag_last(frag)) @@ -1309,6 +1320,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_RAW; } +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, struct perf_branch_stack *brs, @@ -1316,6 +1332,11 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, { int size = sizeof(u64); /* nr */ + if (!has_branch_stack(event)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK)) + return; + if (branch_sample_hw_index(event)) size += sizeof(u64); size += brs->nr * sizeof(struct perf_branch_entry); @@ -1669,6 +1690,8 @@ static inline int perf_allow_tracepoint(struct perf_event_attr *attr) return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } +extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs); + extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, @@ -1705,11 +1728,6 @@ static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) # define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) #endif -static inline bool has_branch_stack(struct perf_event *event) -{ - return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; -} - static inline bool needs_branch_stack(struct perf_event *event) { return event->attr.branch_sample_type != 0; @@ -1879,6 +1897,10 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) +{ + return 0; +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/include/linux/pid.h b/include/linux/pid.h index a3aad9b4074c..98837a1ff0f3 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -59,6 +59,7 @@ struct pid spinlock_t lock; struct dentry *stashed; u64 ino; + struct rb_node pidfs_node; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -68,6 +69,7 @@ struct pid struct upid numbers[]; }; +extern seqcount_spinlock_t pidmap_lock_seq; extern struct pid init_struct_pid; struct file; @@ -106,9 +108,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); -extern int pid_max; -extern int pid_max_min, pid_max_max; - /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index f9f9931e02d6..7c67a5811199 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,7 @@ struct pid_namespace { struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; + int pid_max; struct pid_namespace *parent; #ifdef CONFIG_BSD_PROCESS_ACCT struct fs_pin *bacct; @@ -38,9 +39,14 @@ struct pid_namespace { struct ucounts *ucounts; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) + struct work_struct work; +#ifdef CONFIG_SYSCTL + struct ctl_table_set set; + struct ctl_table_header *sysctls; +#if defined(CONFIG_MEMFD_CREATE) int memfd_noexec_scope; #endif +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; @@ -117,6 +123,8 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +int register_pidns_sysctls(struct pid_namespace *pidns); +void unregister_pidns_sysctls(struct pid_namespace *pidns); static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) { diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 75bdf9807802..7c830d0dec9a 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -4,5 +4,8 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); +void pidfs_add_pid(struct pid *pid); +void pidfs_remove_pid(struct pid *pid); +extern const struct dentry_operations pidfs_dentry_operations; #endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index b34ed0cc1f8d..3ec24f445c29 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -42,6 +42,11 @@ #define EC_MAX_RESPONSE_OVERHEAD 32 /* + * ACPI notify value for MKBP host event. + */ +#define ACPI_NOTIFY_CROS_EC_MKBP 0x80 + +/* * EC panic is not covered by the standard (0-F) ACPI notify values. * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF * device specific ACPI notify range. @@ -246,6 +251,8 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); +int cros_ec_rwsig_continue(struct cros_ec_device *ec_dev); + int cros_ec_query_all(struct cros_ec_device *ec_dev); int cros_ec_get_next_event(struct cros_ec_device *ec_dev, diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 568183e3e641..414146abfe81 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -102,6 +102,8 @@ struct dev_pm_opp_data { struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); +unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index); + unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies); @@ -205,6 +207,11 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device * static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} +static inline unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index) +{ + return 0; +} + static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { return 0; diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h index c9a31c567e85..2e18fef1a2e1 100644 --- a/include/linux/pruss_driver.h +++ b/include/linux/pruss_driver.h @@ -144,32 +144,32 @@ static inline int pruss_release_mem_region(struct pruss *pruss, static inline int pruss_cfg_get_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 *mux) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_set_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 mux) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_gpimode(struct pruss *pruss, enum pruss_pru_id pru_id, enum pruss_gpi_mode mode) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_miirt_enable(struct pruss *pruss, bool enable) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_xfr_enable(struct pruss *pruss, enum pru_type pru_type, - bool enable); + bool enable) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } #endif /* CONFIG_TI_PRUSS */ diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index 730f77381d55..2503f7625d65 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,6 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; + const struct export_operations *eops; const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 7c173aa64e1e..8d2ba3749866 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -211,6 +211,43 @@ rb_add(struct rb_node *node, struct rb_root *tree, } /** + * rb_find_add_cached() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree, + int (*cmp)(const struct rb_node *new, const struct rb_node *exist)) +{ + bool leftmost = true; + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) { + link = &parent->rb_left; + } else if (c > 0) { + link = &parent->rb_right; + leftmost = false; + } else { + return parent; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); + return NULL; +} + +/** * rb_find_add() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert * @tree: tree to search / modify diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 14dfa6008467..1b11926ddd47 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) * way, we must not access it directly */ #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) +/* + * Return the ->prev pointer of a list_head in an rcu safe way. Don't + * access it directly. + * + * Any list traversed with list_bidir_prev_rcu() must never use + * list_del_rcu(). Doing so will poison the ->prev pointer that + * list_bidir_prev_rcu() relies on, which will result in segfaults. + * To prevent these segfaults, use list_bidir_del_rcu() instead + * of list_del_rcu(). + */ +#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) /** * list_tail_rcu - returns the prev pointer of the head of the list @@ -159,6 +170,39 @@ static inline void list_del_rcu(struct list_head *entry) } /** + * list_bidir_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * In contrast to list_del_rcu() doesn't poison the prev pointer thus + * allowing backwards traversal via list_bidir_prev_rcu(). + * + * Note: list_empty() on entry does not return true after this because + * the entry is in a special undefined state that permits RCU-based + * lockfree reverse traversal. In particular this means that we can not + * poison the forward and backwards pointers that may still be used for + * walking the list. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another list-mutation + * primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on + * this same list. However, it is perfectly legal to run concurrently + * with the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that list_del_rcu() and list_bidir_del_rcu() must not be used on + * the same list. + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_bidir_del_rcu(struct list_head *entry) +{ + __list_del_entry(entry); +} + +/** * hlist_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 303ab9bee155..f9bed3d3f78d 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -65,4 +65,15 @@ static inline void cond_resched_rcu(void) #endif } +// Has the current task blocked within its current RCU read-side +// critical section? +static inline bool has_rcu_reader_blocked(void) +{ +#ifdef CONFIG_PREEMPT_RCU + return !list_empty(¤t->rcu_node_entry); +#else + return false; +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rolling_buffer.h b/include/linux/rolling_buffer.h new file mode 100644 index 000000000000..ac15b1ffdd83 --- /dev/null +++ b/include/linux/rolling_buffer.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Rolling buffer of folios + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _ROLLING_BUFFER_H +#define _ROLLING_BUFFER_H + +#include <linux/folio_queue.h> +#include <linux/uio.h> + +/* + * Rolling buffer. Whilst the buffer is live and in use, folios and folio + * queue segments can be added to one end by one thread and removed from the + * other end by another thread. The buffer isn't allowed to be empty; it must + * always have at least one folio_queue in it so that neither side has to + * modify both queue pointers. + * + * The iterator in the buffer is extended as buffers are inserted. It can be + * snapshotted to use a segment of the buffer. + */ +struct rolling_buffer { + struct folio_queue *head; /* Producer's insertion point */ + struct folio_queue *tail; /* Consumer's removal point */ + struct iov_iter iter; /* Iterator tracking what's left in the buffer */ + u8 next_head_slot; /* Next slot in ->head */ + u8 first_tail_slot; /* First slot in ->tail */ +}; + +/* + * Snapshot of a rolling buffer. + */ +struct rolling_buffer_snapshot { + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ + unsigned char curr_slot; /* Folio currently being read */ + unsigned char curr_order; /* Order of folio */ +}; + +/* Marks to store per-folio in the internal folio_queue structs. */ +#define ROLLBUF_MARK_1 BIT(0) +#define ROLLBUF_MARK_2 BIT(1) + +int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id, + unsigned int direction); +int rolling_buffer_make_space(struct rolling_buffer *roll); +ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll, + struct readahead_control *ractl, + struct folio_batch *put_batch); +ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio, + unsigned int flags); +struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll); +void rolling_buffer_clear(struct rolling_buffer *roll); + +static inline void rolling_buffer_advance(struct rolling_buffer *roll, size_t amount) +{ + iov_iter_advance(&roll->iter, amount); +} + +#endif /* _ROLLING_BUFFER_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 64934e0830af..ac08431e238f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -944,6 +944,7 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; + unsigned sched_task_hot:1; /* Force alignment to the next boundary: */ unsigned :0; @@ -1374,6 +1375,15 @@ struct task_struct { * with respect to preemption. */ unsigned long rseq_event_mask; +# ifdef CONFIG_DEBUG_RSEQ + /* + * This is a place holder to save a copy of the rseq fields for + * validation of read-only fields. The struct rseq has a + * variable-length array at the end, so it cannot be used + * directly. Reserve a size large enough for the known fields. + */ + char rseq_fields[sizeof(struct rseq)]; +# endif #endif #ifdef CONFIG_SCHED_MM_CID @@ -1944,11 +1954,10 @@ static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); - -static inline void set_task_comm(struct task_struct *tsk, const char *from) -{ - __set_task_comm(tsk, from, false); -} +#define set_task_comm(tsk, from) ({ \ + BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ + __set_task_comm(tsk, from, false); \ +}) /* * - Why not use task_lock()? diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 2b461129d1fa..d8501f4709b5 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,16 +7,21 @@ #include <linux/tick.h> enum hk_type { - HK_TYPE_TIMER, - HK_TYPE_RCU, - HK_TYPE_MISC, - HK_TYPE_SCHED, - HK_TYPE_TICK, HK_TYPE_DOMAIN, - HK_TYPE_WQ, HK_TYPE_MANAGED_IRQ, - HK_TYPE_KTHREAD, - HK_TYPE_MAX + HK_TYPE_KERNEL_NOISE, + HK_TYPE_MAX, + + /* + * The following housekeeping types are only set by the nohz_full + * boot commandline option. So they can share the same value. + */ + HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE, + HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE, + HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, + HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, + HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, + HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 4237daa5ac7a..7f3dbafe1817 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -114,7 +114,10 @@ struct sched_domain { unsigned int lb_count[CPU_MAX_IDLE_TYPES]; unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES]; unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; @@ -140,9 +143,7 @@ struct sched_domain { unsigned int ttwu_move_affine; unsigned int ttwu_move_balance; #endif -#ifdef CONFIG_SCHED_DEBUG char *name; -#endif union { void *private; /* used during construction */ struct rcu_head rcu; /* used during destruction */ @@ -198,18 +199,12 @@ struct sched_domain_topology_level { int flags; int numa_level; struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG char *name; -#endif }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); -#ifdef CONFIG_SCHED_DEBUG # define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) -#endif #else /* CONFIG_SMP */ diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 06cd8fb2f409..0f28b4623ad4 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -63,4 +63,38 @@ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); +/* Spin unlock helpers to unlock and call wake_up_q with preempt disabled */ +static inline +void raw_spin_unlock_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock(lock); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} + +static inline +void raw_spin_unlock_irq_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock_irq(lock); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} + +static inline +void raw_spin_unlock_irqrestore_wake(raw_spinlock_t *lock, unsigned long flags, + struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock_irqrestore(lock, flags); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} #endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/include/linux/security.h b/include/linux/security.h index cbdba435b798..980b6c207cad 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -226,6 +226,18 @@ extern unsigned long dac_mmap_min_addr; #endif /* + * A "security context" is the text representation of + * the information used by LSMs. + * This structure contains the string, its length, and which LSM + * it is useful for. + */ +struct lsm_context { + char *context; /* Provided by the module */ + u32 len; + int id; /* Identifies the module */ +}; + +/* * Values used in the task_security_ops calls */ /* setuid or setgid, id0 == uid or gid */ @@ -378,8 +390,8 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen); + const char **xattr_name, + struct lsm_context *lsmcxt); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -553,14 +565,14 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name, int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); -int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); -int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); +int security_secid_to_secctx(u32 secid, struct lsm_context *cp); +int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); -void security_release_secctx(char *secdata, u32 seclen); +void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); -int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); +int security_inode_getsecctx(struct inode *inode, struct lsm_context *cp); int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); @@ -852,8 +864,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, - u32 *ctxlen) + struct lsm_context *lsmcxt) { return -EOPNOTSUPP; } @@ -1526,14 +1537,13 @@ static inline int security_ismaclabel(const char *name) return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, - u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp) { return -EOPNOTSUPP; } static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) { return -EOPNOTSUPP; } @@ -1545,7 +1555,7 @@ static inline int security_secctx_to_secid(const char *secdata, return -EOPNOTSUPP; } -static inline void security_release_secctx(char *secdata, u32 seclen) +static inline void security_release_secctx(struct lsm_context *cp) { } @@ -1561,7 +1571,8 @@ static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 { return -EOPNOTSUPP; } -static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +static inline int security_inode_getsecctx(struct inode *inode, + struct lsm_context *cp) { return -EOPNOTSUPP; } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5298765d6ca4..d1a2346cf0f8 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) ({ \ unsigned __seq; \ \ - while ((__seq = seqprop_sequence(s)) & 1) \ + while (unlikely((__seq = seqprop_sequence(s)) & 1)) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -319,6 +319,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) }) /** + * raw_seqcount_try_begin() - begin a seqcount_t read critical section + * w/o lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * + * Similar to raw_seqcount_begin(), except it enables eliding the critical + * section entirely if odd, instead of doing the speculation knowing it will + * fail. + * + * Useful when counter stabilization is more or less equivalent to taking + * the lock and there is a slowpath that does that. + * + * If true, start will be set to the (even) sequence count read. + * + * Return: true when a read critical section is started. + */ +#define raw_seqcount_try_begin(s, start) \ +({ \ + start = raw_read_seqcount(s); \ + !(start & 1); \ +}) + +/** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants diff --git a/include/linux/slab.h b/include/linux/slab.h index 10a971c2bde3..09eedaecf120 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1099,5 +1099,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s); size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); +void __init kvfree_rcu_init(void); #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..d7ba46e74f58 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,6 +43,12 @@ int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above. + #ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h> #elif defined(CONFIG_TREE_SRCU) @@ -232,13 +238,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * The return value from srcu_read_lock() must be passed unaltered - * to the matching srcu_read_unlock(). Note that srcu_read_lock() and - * the matching srcu_read_unlock() must occur in the same context, for - * example, it is illegal to invoke srcu_read_unlock() in an irq handler - * if the matching srcu_read_lock() was invoked in process context. Or, - * for that matter to invoke srcu_read_unlock() from one task and the - * matching srcu_read_lock() from another. + * The return value from srcu_read_lock() is guaranteed to be + * non-negative. This value must be passed unaltered to the matching + * srcu_read_unlock(). Note that srcu_read_lock() and the matching + * srcu_read_unlock() must occur in the same context, for example, it is + * illegal to invoke srcu_read_unlock() in an irq handler if the matching + * srcu_read_lock() was invoked in process context. Or, for that matter to + * invoke srcu_read_unlock() from one task and the matching srcu_read_lock() + * from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..b17814c9d1c7 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -26,6 +26,7 @@ struct srcu_data { atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ + /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -43,11 +44,6 @@ struct srcu_data { struct srcu_struct *ssp; }; -/* Values for ->srcu_reader_flavor. */ -#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). -#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). -#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). - /* * Node in SRCU combining tree, similar in function to rcu_data. */ @@ -258,7 +254,7 @@ static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) return; - // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. + // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); } diff --git a/include/linux/stat.h b/include/linux/stat.h index 3d900c86981c..9d8382e23a9c 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -52,6 +52,7 @@ struct kstat { u64 mnt_id; u32 dio_mem_align; u32 dio_offset_align; + u32 dio_read_offset_align; u64 change_cookie; u64 subvol; u32 atomic_write_unit_min; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0e035f675efe..542773650200 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -264,18 +264,6 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); /** - * struct ktime_timestamps - Simultaneous mono/boot/real timestamps - * @mono: Monotonic timestamp - * @boot: Boottime timestamp - * @real: Realtime timestamp - */ -struct ktime_timestamps { - u64 mono; - u64 boot; - u64 real; -}; - -/** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value * @cycles: Clocksource counter value to produce the system times @@ -345,9 +333,6 @@ extern int get_device_system_crosststamp( */ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); -/* NMI safe mono/boot/realtime timestamps */ -extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap); - /* * Persistent clock related interfaces */ diff --git a/include/linux/torture.h b/include/linux/torture.h index c2e979f82f8d..0134e7221cae 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -130,7 +130,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #endif #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); #endif #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index dd4b31ce6d5d..d4cdc089f6c3 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -320,7 +320,7 @@ struct serial_struct; * * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)`` * - * kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is + * kgdboc support (Documentation/process/debugging/kgdb.rst). This routine is * called to initialize the HW for later use by calling @poll_get_char or * @poll_put_char. * diff --git a/include/linux/uio.h b/include/linux/uio.h index 853f9de5aa05..8ada84e85447 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -82,6 +82,15 @@ struct iov_iter { }; }; +typedef __u16 uio_meta_flags_t; + +struct uio_meta { + uio_meta_flags_t flags; + u16 app_tag; + u64 seed; + struct iov_iter iter; +}; + static inline const struct iovec *iter_iov(const struct iov_iter *iter) { if (iter->iter_type == ITER_UBUF) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e0a4c2082245..b1df7d792fa1 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/timer.h> +#include <linux/seqlock.h> struct uprobe; struct vm_area_struct; @@ -124,6 +125,10 @@ struct uprobe_task { unsigned int depth; struct return_instance *return_instances; + struct return_instance *ri_pool; + struct timer_list ri_timer; + seqcount_t ri_seqcount; + union { struct { struct arch_uprobe_task autask; @@ -137,7 +142,6 @@ struct uprobe_task { }; struct uprobe *active_uprobe; - struct timer_list ri_timer; unsigned long xol_vaddr; struct arch_uprobe *auprobe; @@ -154,12 +158,18 @@ struct return_instance { unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ - int consumers_cnt; + int cons_cnt; /* total number of session consumers */ struct return_instance *next; /* keep as stack */ struct rcu_head rcu; - struct return_consumer consumers[] __counted_by(consumers_cnt); + /* singular pre-allocated return_consumer instance for common case */ + struct return_consumer consumer; + /* + * extra return_consumer instances for rare cases of multiple session consumers, + * contains (cons_cnt - 1) elements + */ + struct return_consumer *extra_consumers; } ____cacheline_aligned; enum rp_check { diff --git a/include/net/scm.h b/include/net/scm.h index 0d35c7c77a74..22bb49589fde 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -105,16 +105,16 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, #ifdef CONFIG_SECURITY_NETWORK static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { - char *secdata; - u32 seclen; + struct lsm_context ctx; int err; if (test_bit(SOCK_PASSSEC, &sock->flags)) { - err = security_secid_to_secctx(scm->secid, &secdata, &seclen); + err = security_secid_to_secctx(scm->secid, &ctx); - if (!err) { - put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata); - security_release_secctx(secdata, seclen); + if (err >= 0) { + put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len, + ctx.context); + security_release_secctx(&ctx); } } } diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 2b4ab0369ffb..02823d6af37d 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -438,8 +438,10 @@ struct scsi_host_template { */ short cmd_per_lun; - /* If use block layer to manage tags, this is tag allocation policy */ - int tag_alloc_policy; + /* + * Allocate tags starting from last allocated tag. + */ + bool tag_alloc_policy_rr : 1; /* * Track QUEUE_FULL events and reduce queue depth on demand. diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h index 5e1a9eafd10f..b220072cfa1b 100644 --- a/include/sound/hdmi-codec.h +++ b/include/sound/hdmi-codec.h @@ -105,7 +105,8 @@ struct hdmi_codec_ops { * Optional */ int (*get_dai_id)(struct snd_soc_component *comment, - struct device_node *endpoint); + struct device_node *endpoint, + void *data); /* * Hook callback function to handle connector plug event. @@ -114,9 +115,6 @@ struct hdmi_codec_ops { int (*hook_plugged_cb)(struct device *dev, void *data, hdmi_codec_plugged_cb fn, struct device *codec_dev); - - /* bit field */ - unsigned int no_capture_mute:1; }; /* HDMI codec initalization data */ @@ -128,6 +126,7 @@ struct hdmi_codec_pdata { uint spdif:1; uint no_spdif_playback:1; uint no_spdif_capture:1; + uint no_capture_mute:1; int max_i2s_channels; void *data; }; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index a0aed1a428a1..b0db89058c91 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -118,6 +118,8 @@ enum yfs_cm_operation { */ #define afs_call_traces \ EM(afs_call_trace_alloc, "ALLOC") \ + EM(afs_call_trace_async_abort, "ASYAB") \ + EM(afs_call_trace_async_kill, "ASYKL") \ EM(afs_call_trace_free, "FREE ") \ EM(afs_call_trace_get, "GET ") \ EM(afs_call_trace_put, "PUT ") \ @@ -168,12 +170,14 @@ enum yfs_cm_operation { #define afs_cell_traces \ EM(afs_cell_trace_alloc, "ALLOC ") \ EM(afs_cell_trace_free, "FREE ") \ + EM(afs_cell_trace_get_atcell, "GET atcell") \ EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \ EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \ EM(afs_cell_trace_get_queue_new, "GET q-new ") \ EM(afs_cell_trace_get_vol, "GET vol ") \ EM(afs_cell_trace_insert, "INSERT ") \ EM(afs_cell_trace_manage, "MANAGE ") \ + EM(afs_cell_trace_put_atcell, "PUT atcell") \ EM(afs_cell_trace_put_candidate, "PUT candid") \ EM(afs_cell_trace_put_destroy, "PUT destry") \ EM(afs_cell_trace_put_queue_work, "PUT q-work") \ @@ -323,6 +327,44 @@ enum yfs_cm_operation { EM(yfs_CB_TellMeAboutYourself, "YFSCB.TellMeAboutYourself") \ E_(yfs_CB_CallBack, "YFSCB.CallBack") +#define afs_cb_promise_traces \ + EM(afs_cb_promise_clear_cb_break, "CLEAR cb-break") \ + EM(afs_cb_promise_clear_rmdir, "CLEAR rmdir") \ + EM(afs_cb_promise_clear_rotate_server, "CLEAR rot-srv") \ + EM(afs_cb_promise_clear_server_change, "CLEAR srv-chg") \ + EM(afs_cb_promise_clear_vol_init_cb, "CLEAR vol-init-cb") \ + EM(afs_cb_promise_set_apply_cb, "SET apply-cb") \ + EM(afs_cb_promise_set_new_inode, "SET new-inode") \ + E_(afs_cb_promise_set_new_symlink, "SET new-symlink") + +#define afs_vnode_invalid_traces \ + EM(afs_vnode_invalid_trace_cb_ro_snapshot, "cb-ro-snapshot") \ + EM(afs_vnode_invalid_trace_cb_scrub, "cb-scrub") \ + EM(afs_vnode_invalid_trace_cb_v_break, "cb-v-break") \ + EM(afs_vnode_invalid_trace_expired, "expired") \ + EM(afs_vnode_invalid_trace_no_cb_promise, "no-cb-promise") \ + EM(afs_vnode_invalid_trace_vol_expired, "vol-expired") \ + EM(afs_vnode_invalid_trace_zap_data, "zap-data") \ + E_(afs_vnode_valid_trace, "valid") + +#define afs_dir_invalid_traces \ + EM(afs_dir_invalid_edit_add_bad_size, "edit-add-bad-size") \ + EM(afs_dir_invalid_edit_add_no_slots, "edit-add-no-slots") \ + EM(afs_dir_invalid_edit_add_too_many_blocks, "edit-add-too-many-blocks") \ + EM(afs_dir_invalid_edit_get_block, "edit-get-block") \ + EM(afs_dir_invalid_edit_mkdir, "edit-mkdir") \ + EM(afs_dir_invalid_edit_rem_bad_size, "edit-rem-bad-size") \ + EM(afs_dir_invalid_edit_rem_wrong_name, "edit-rem-wrong_name") \ + EM(afs_dir_invalid_edit_upd_bad_size, "edit-upd-bad-size") \ + EM(afs_dir_invalid_edit_upd_no_dd, "edit-upd-no-dotdot") \ + EM(afs_dir_invalid_dv_mismatch, "dv-mismatch") \ + EM(afs_dir_invalid_inval_folio, "inv-folio") \ + EM(afs_dir_invalid_iter_stale, "iter-stale") \ + EM(afs_dir_invalid_reclaimed_folio, "reclaimed-folio") \ + EM(afs_dir_invalid_release_folio, "rel-folio") \ + EM(afs_dir_invalid_remote, "remote") \ + E_(afs_dir_invalid_subdir_removed, "subdir-removed") + #define afs_edit_dir_ops \ EM(afs_edit_dir_create, "create") \ EM(afs_edit_dir_create_error, "c_fail") \ @@ -332,6 +374,7 @@ enum yfs_cm_operation { EM(afs_edit_dir_delete_error, "d_err ") \ EM(afs_edit_dir_delete_inval, "d_invl") \ EM(afs_edit_dir_delete_noent, "d_nent") \ + EM(afs_edit_dir_mkdir, "mk_ent") \ EM(afs_edit_dir_update_dd, "u_ddot") \ EM(afs_edit_dir_update_error, "u_fail") \ EM(afs_edit_dir_update_inval, "u_invl") \ @@ -385,6 +428,7 @@ enum yfs_cm_operation { EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \ EM(afs_file_error_dir_small, "DIR_SMALL") \ EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \ + EM(afs_file_error_symlink_big, "SYM_BIG") \ EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \ E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED") @@ -487,7 +531,9 @@ enum yfs_cm_operation { enum afs_alist_trace { afs_alist_traces } __mode(byte); enum afs_call_trace { afs_call_traces } __mode(byte); enum afs_cb_break_reason { afs_cb_break_reasons } __mode(byte); +enum afs_cb_promise_trace { afs_cb_promise_traces } __mode(byte); enum afs_cell_trace { afs_cell_traces } __mode(byte); +enum afs_dir_invalid_trace { afs_dir_invalid_traces} __mode(byte); enum afs_edit_dir_op { afs_edit_dir_ops } __mode(byte); enum afs_edit_dir_reason { afs_edit_dir_reasons } __mode(byte); enum afs_eproto_cause { afs_eproto_causes } __mode(byte); @@ -498,6 +544,7 @@ enum afs_flock_operation { afs_flock_operations } __mode(byte); enum afs_io_error { afs_io_errors } __mode(byte); enum afs_rotate_trace { afs_rotate_traces } __mode(byte); enum afs_server_trace { afs_server_traces } __mode(byte); +enum afs_vnode_invalid_trace { afs_vnode_invalid_traces} __mode(byte); enum afs_volume_trace { afs_volume_traces } __mode(byte); #endif /* end __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY */ @@ -513,8 +560,10 @@ enum afs_volume_trace { afs_volume_traces } __mode(byte); afs_alist_traces; afs_call_traces; afs_cb_break_reasons; +afs_cb_promise_traces; afs_cell_traces; afs_cm_operations; +afs_dir_invalid_traces; afs_edit_dir_ops; afs_edit_dir_reasons; afs_eproto_causes; @@ -526,6 +575,7 @@ afs_fs_operations; afs_io_errors; afs_rotate_traces; afs_server_traces; +afs_vnode_invalid_traces; afs_vl_operations; yfs_cm_operations; @@ -670,7 +720,7 @@ TRACE_EVENT(afs_make_fs_call, } ), - TP_printk("c=%08x %06llx:%06llx:%06x %s", + TP_printk("c=%08x V=%llx i=%llx:%x %s", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -704,7 +754,7 @@ TRACE_EVENT(afs_make_fs_calli, } ), - TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u", + TP_printk("c=%08x V=%llx i=%llx:%x %s i=%u", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -741,7 +791,7 @@ TRACE_EVENT(afs_make_fs_call1, __entry->name[__len] = 0; ), - TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\"", + TP_printk("c=%08x V=%llx i=%llx:%x %s \"%s\"", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -782,7 +832,7 @@ TRACE_EVENT(afs_make_fs_call2, __entry->name2[__len2] = 0; ), - TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\" \"%s\"", + TP_printk("c=%08x V=%llx i=%llx:%x %s \"%s\" \"%s\"", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -887,9 +937,9 @@ TRACE_EVENT(afs_sent_data, ); TRACE_EVENT(afs_dir_check_failed, - TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size), + TP_PROTO(struct afs_vnode *vnode, loff_t off), - TP_ARGS(vnode, off, i_size), + TP_ARGS(vnode, off), TP_STRUCT__entry( __field(struct afs_vnode *, vnode) @@ -900,7 +950,7 @@ TRACE_EVENT(afs_dir_check_failed, TP_fast_assign( __entry->vnode = vnode; __entry->off = off; - __entry->i_size = i_size; + __entry->i_size = i_size_read(&vnode->netfs.inode); ), TP_printk("vn=%p %llx/%llx", @@ -1002,7 +1052,7 @@ TRACE_EVENT(afs_edit_dir, __entry->name[__len] = 0; ), - TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x \"%s\"", + TP_printk("di=%x:%x %s %s %u[%u] fi=%x:%x \"%s\"", __entry->vnode, __entry->unique, __print_symbolic(__entry->why, afs_edit_dir_reasons), __print_symbolic(__entry->op, afs_edit_dir_ops), @@ -1011,6 +1061,122 @@ TRACE_EVENT(afs_edit_dir, __entry->name) ); +TRACE_EVENT(afs_dir_invalid, + TP_PROTO(const struct afs_vnode *dvnode, enum afs_dir_invalid_trace trace), + + TP_ARGS(dvnode, trace), + + TP_STRUCT__entry( + __field(unsigned int, vnode) + __field(unsigned int, unique) + __field(enum afs_dir_invalid_trace, trace) + ), + + TP_fast_assign( + __entry->vnode = dvnode->fid.vnode; + __entry->unique = dvnode->fid.unique; + __entry->trace = trace; + ), + + TP_printk("di=%x:%x %s", + __entry->vnode, __entry->unique, + __print_symbolic(__entry->trace, afs_dir_invalid_traces)) + ); + +TRACE_EVENT(afs_cb_promise, + TP_PROTO(const struct afs_vnode *vnode, enum afs_cb_promise_trace trace), + + TP_ARGS(vnode, trace), + + TP_STRUCT__entry( + __field(unsigned int, vnode) + __field(unsigned int, unique) + __field(enum afs_cb_promise_trace, trace) + ), + + TP_fast_assign( + __entry->vnode = vnode->fid.vnode; + __entry->unique = vnode->fid.unique; + __entry->trace = trace; + ), + + TP_printk("di=%x:%x %s", + __entry->vnode, __entry->unique, + __print_symbolic(__entry->trace, afs_cb_promise_traces)) + ); + +TRACE_EVENT(afs_vnode_invalid, + TP_PROTO(const struct afs_vnode *vnode, enum afs_vnode_invalid_trace trace), + + TP_ARGS(vnode, trace), + + TP_STRUCT__entry( + __field(unsigned int, vnode) + __field(unsigned int, unique) + __field(enum afs_vnode_invalid_trace, trace) + ), + + TP_fast_assign( + __entry->vnode = vnode->fid.vnode; + __entry->unique = vnode->fid.unique; + __entry->trace = trace; + ), + + TP_printk("di=%x:%x %s", + __entry->vnode, __entry->unique, + __print_symbolic(__entry->trace, afs_vnode_invalid_traces)) + ); + +TRACE_EVENT(afs_set_dv, + TP_PROTO(const struct afs_vnode *dvnode, u64 new_dv), + + TP_ARGS(dvnode, new_dv), + + TP_STRUCT__entry( + __field(unsigned int, vnode) + __field(unsigned int, unique) + __field(u64, old_dv) + __field(u64, new_dv) + ), + + TP_fast_assign( + __entry->vnode = dvnode->fid.vnode; + __entry->unique = dvnode->fid.unique; + __entry->old_dv = dvnode->status.data_version; + __entry->new_dv = new_dv; + ), + + TP_printk("di=%x:%x dv=%llx -> dv=%llx", + __entry->vnode, __entry->unique, + __entry->old_dv, __entry->new_dv) + ); + +TRACE_EVENT(afs_dv_mismatch, + TP_PROTO(const struct afs_vnode *dvnode, u64 before_dv, int delta, u64 new_dv), + + TP_ARGS(dvnode, before_dv, delta, new_dv), + + TP_STRUCT__entry( + __field(unsigned int, vnode) + __field(unsigned int, unique) + __field(int, delta) + __field(u64, before_dv) + __field(u64, new_dv) + ), + + TP_fast_assign( + __entry->vnode = dvnode->fid.vnode; + __entry->unique = dvnode->fid.unique; + __entry->delta = delta; + __entry->before_dv = before_dv; + __entry->new_dv = new_dv; + ), + + TP_printk("di=%x:%x xdv=%llx+%d dv=%llx", + __entry->vnode, __entry->unique, + __entry->before_dv, __entry->delta, __entry->new_dv) + ); + TRACE_EVENT(afs_protocol_error, TP_PROTO(struct afs_call *call, enum afs_eproto_cause cause), @@ -1611,6 +1777,36 @@ TRACE_EVENT(afs_make_call, __entry->fid.unique) ); +TRACE_EVENT(afs_read_recv, + TP_PROTO(const struct afs_operation *op, const struct afs_call *call), + + TP_ARGS(op, call), + + TP_STRUCT__entry( + __field(unsigned int, rreq) + __field(unsigned int, sreq) + __field(unsigned int, op) + __field(unsigned int, op_flags) + __field(unsigned int, call) + __field(enum afs_call_state, call_state) + ), + + TP_fast_assign( + __entry->op = op->debug_id; + __entry->sreq = op->fetch.subreq->debug_index; + __entry->rreq = op->fetch.subreq->rreq->debug_id; + __entry->op_flags = op->flags; + __entry->call = call->debug_id; + __entry->call_state = call->state; + ), + + TP_printk("R=%08x[%x] OP=%08x c=%08x cs=%x of=%x", + __entry->rreq, __entry->sreq, + __entry->op, + __entry->call, __entry->call_state, + __entry->op_flags) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h new file mode 100644 index 000000000000..c6cb2da7b706 --- /dev/null +++ b/include/trace/events/amdxdna.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM amdxdna + +#if !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_AMDXDNA_H + +#include <drm/gpu_scheduler.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(amdxdna_debug_point, + TP_PROTO(const char *name, u64 number, const char *str), + + TP_ARGS(name, number, str), + + TP_STRUCT__entry(__string(name, name) + __field(u64, number) + __string(str, str)), + + TP_fast_assign(__assign_str(name); + __entry->number = number; + __assign_str(str);), + + TP_printk("%s:%llu %s", __get_str(name), __entry->number, + __get_str(str)) +); + +TRACE_EVENT(xdna_job, + TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq), + + TP_ARGS(sched_job, name, str, seq), + + TP_STRUCT__entry(__string(name, name) + __string(str, str) + __field(u64, fence_context) + __field(u64, fence_seqno) + __field(u64, seq)), + + TP_fast_assign(__assign_str(name); + __assign_str(str); + __entry->fence_context = sched_job->s_fence->finished.context; + __entry->fence_seqno = sched_job->s_fence->finished.seqno; + __entry->seq = seq;), + + TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s", + __entry->fence_context, __entry->fence_seqno, + __get_str(name), __entry->seq, + __get_str(str)) +); + +DECLARE_EVENT_CLASS(xdna_mbox_msg, + TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 msg_id), + + TP_ARGS(name, chann_id, opcode, msg_id), + + TP_STRUCT__entry(__string(name, name) + __field(u32, chann_id) + __field(u32, opcode) + __field(u32, msg_id)), + + TP_fast_assign(__assign_str(name); + __entry->chann_id = chann_id; + __entry->opcode = opcode; + __entry->msg_id = msg_id;), + + TP_printk("%s.%d id 0x%x opcode 0x%x", __get_str(name), + __entry->chann_id, __entry->msg_id, __entry->opcode) +); + +DEFINE_EVENT(xdna_mbox_msg, mbox_set_tail, + TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 id), + TP_ARGS(name, chann_id, opcode, id) +); + +DEFINE_EVENT(xdna_mbox_msg, mbox_set_head, + TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 id), + TP_ARGS(name, chann_id, opcode, id) +); + +TRACE_EVENT(mbox_irq_handle, + TP_PROTO(char *name, int irq), + + TP_ARGS(name, irq), + + TP_STRUCT__entry(__string(name, name) + __field(int, irq)), + + TP_fast_assign(__assign_str(name); + __entry->irq = irq;), + + TP_printk("%s.%d", __get_str(name), __entry->irq) +); + +#endif /* !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4df93ca9b7a8..549ab3b41961 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -100,7 +100,8 @@ struct find_free_extent_ctl; EM( ALLOC_CHUNK, "ALLOC_CHUNK") \ EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \ - EMe(COMMIT_TRANS, "COMMIT_TRANS") + EM( COMMIT_TRANS, "COMMIT_TRANS") \ + EMe(RESET_ZONES, "RESET_ZONES") /* * First define the enums in the above macros to be exported to userspace via diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 7d931db02b93..a743b2a35ea7 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -223,10 +223,10 @@ TRACE_EVENT(cachefiles_ref, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, cookie ) - __field(enum cachefiles_obj_ref_trace, why ) - __field(int, usage ) + __field(unsigned int, obj) + __field(unsigned int, cookie) + __field(enum cachefiles_obj_ref_trace, why) + __field(int, usage) ), TP_fast_assign( @@ -249,10 +249,10 @@ TRACE_EVENT(cachefiles_lookup, TP_ARGS(obj, dir, de), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(short, error ) - __field(unsigned long, dino ) - __field(unsigned long, ino ) + __field(unsigned int, obj) + __field(short, error) + __field(unsigned long, dino) + __field(unsigned long, ino) ), TP_fast_assign( @@ -273,8 +273,8 @@ TRACE_EVENT(cachefiles_mkdir, TP_ARGS(dir, subdir), TP_STRUCT__entry( - __field(unsigned int, dir ) - __field(unsigned int, subdir ) + __field(unsigned int, dir) + __field(unsigned int, subdir) ), TP_fast_assign( @@ -293,8 +293,8 @@ TRACE_EVENT(cachefiles_tmpfile, TP_ARGS(obj, backer), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) + __field(unsigned int, obj) + __field(unsigned int, backer) ), TP_fast_assign( @@ -313,8 +313,8 @@ TRACE_EVENT(cachefiles_link, TP_ARGS(obj, backer), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) + __field(unsigned int, obj) + __field(unsigned int, backer) ), TP_fast_assign( @@ -336,9 +336,9 @@ TRACE_EVENT(cachefiles_unlink, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, ino ) - __field(enum fscache_why_object_killed, why ) + __field(unsigned int, obj) + __field(unsigned int, ino) + __field(enum fscache_why_object_killed, why) ), TP_fast_assign( @@ -361,9 +361,9 @@ TRACE_EVENT(cachefiles_rename, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, ino ) - __field(enum fscache_why_object_killed, why ) + __field(unsigned int, obj) + __field(unsigned int, ino) + __field(enum fscache_why_object_killed, why) ), TP_fast_assign( @@ -380,17 +380,20 @@ TRACE_EVENT(cachefiles_rename, TRACE_EVENT(cachefiles_coherency, TP_PROTO(struct cachefiles_object *obj, ino_t ino, + u64 disk_aux, enum cachefiles_content content, enum cachefiles_coherency_trace why), - TP_ARGS(obj, ino, content, why), + TP_ARGS(obj, ino, disk_aux, content, why), /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(enum cachefiles_coherency_trace, why ) - __field(enum cachefiles_content, content ) - __field(u64, ino ) + __field(unsigned int, obj) + __field(enum cachefiles_coherency_trace, why) + __field(enum cachefiles_content, content) + __field(u64, ino) + __field(u64, aux) + __field(u64, disk_aux) ), TP_fast_assign( @@ -398,13 +401,17 @@ TRACE_EVENT(cachefiles_coherency, __entry->why = why; __entry->content = content; __entry->ino = ino; + __entry->aux = be64_to_cpup((__be64 *)obj->cookie->inline_aux); + __entry->disk_aux = disk_aux; ), - TP_printk("o=%08x %s B=%llx c=%u", + TP_printk("o=%08x %s B=%llx c=%u aux=%llx dsk=%llx", __entry->obj, __print_symbolic(__entry->why, cachefiles_coherency_traces), __entry->ino, - __entry->content) + __entry->content, + __entry->aux, + __entry->disk_aux) ); TRACE_EVENT(cachefiles_vol_coherency, @@ -416,9 +423,9 @@ TRACE_EVENT(cachefiles_vol_coherency, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, vol ) - __field(enum cachefiles_coherency_trace, why ) - __field(u64, ino ) + __field(unsigned int, vol) + __field(enum cachefiles_coherency_trace, why) + __field(u64, ino) ), TP_fast_assign( @@ -445,14 +452,14 @@ TRACE_EVENT(cachefiles_prep_read, TP_ARGS(obj, start, len, flags, source, why, cache_inode, netfs_inode), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned short, flags ) - __field(enum netfs_io_source, source ) - __field(enum cachefiles_prepare_read_trace, why ) - __field(size_t, len ) - __field(loff_t, start ) - __field(unsigned int, netfs_inode ) - __field(unsigned int, cache_inode ) + __field(unsigned int, obj) + __field(unsigned short, flags) + __field(enum netfs_io_source, source) + __field(enum cachefiles_prepare_read_trace, why) + __field(size_t, len) + __field(loff_t, start) + __field(unsigned int, netfs_inode) + __field(unsigned int, cache_inode) ), TP_fast_assign( @@ -484,10 +491,10 @@ TRACE_EVENT(cachefiles_read, TP_ARGS(obj, backer, start, len), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(size_t, len ) - __field(loff_t, start ) + __field(unsigned int, obj) + __field(unsigned int, backer) + __field(size_t, len) + __field(loff_t, start) ), TP_fast_assign( @@ -513,10 +520,10 @@ TRACE_EVENT(cachefiles_write, TP_ARGS(obj, backer, start, len), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(size_t, len ) - __field(loff_t, start ) + __field(unsigned int, obj) + __field(unsigned int, backer) + __field(size_t, len) + __field(loff_t, start) ), TP_fast_assign( @@ -540,11 +547,11 @@ TRACE_EVENT(cachefiles_trunc, TP_ARGS(obj, backer, from, to, why), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(enum cachefiles_trunc_trace, why ) - __field(loff_t, from ) - __field(loff_t, to ) + __field(unsigned int, obj) + __field(unsigned int, backer) + __field(enum cachefiles_trunc_trace, why) + __field(loff_t, from) + __field(loff_t, to) ), TP_fast_assign( @@ -571,8 +578,8 @@ TRACE_EVENT(cachefiles_mark_active, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(ino_t, inode ) + __field(unsigned int, obj) + __field(ino_t, inode) ), TP_fast_assign( @@ -592,8 +599,8 @@ TRACE_EVENT(cachefiles_mark_failed, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(ino_t, inode ) + __field(unsigned int, obj) + __field(ino_t, inode) ), TP_fast_assign( @@ -613,8 +620,8 @@ TRACE_EVENT(cachefiles_mark_inactive, /* Note that obj may be NULL */ TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(ino_t, inode ) + __field(unsigned int, obj) + __field(ino_t, inode) ), TP_fast_assign( @@ -633,10 +640,10 @@ TRACE_EVENT(cachefiles_vfs_error, TP_ARGS(obj, backer, error, where), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(enum cachefiles_error_trace, where ) - __field(short, error ) + __field(unsigned int, obj) + __field(unsigned int, backer) + __field(enum cachefiles_error_trace, where) + __field(short, error) ), TP_fast_assign( @@ -660,10 +667,10 @@ TRACE_EVENT(cachefiles_io_error, TP_ARGS(obj, backer, error, where), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(enum cachefiles_error_trace, where ) - __field(short, error ) + __field(unsigned int, obj) + __field(unsigned int, backer) + __field(enum cachefiles_error_trace, where) + __field(short, error) ), TP_fast_assign( @@ -687,11 +694,11 @@ TRACE_EVENT(cachefiles_ondemand_open, TP_ARGS(obj, msg, load), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, msg_id ) - __field(unsigned int, object_id ) - __field(unsigned int, fd ) - __field(unsigned int, flags ) + __field(unsigned int, obj) + __field(unsigned int, msg_id) + __field(unsigned int, object_id) + __field(unsigned int, fd) + __field(unsigned int, flags) ), TP_fast_assign( @@ -717,9 +724,9 @@ TRACE_EVENT(cachefiles_ondemand_copen, TP_ARGS(obj, msg_id, len), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, msg_id ) - __field(long, len ) + __field(unsigned int, obj) + __field(unsigned int, msg_id) + __field(long, len) ), TP_fast_assign( @@ -740,9 +747,9 @@ TRACE_EVENT(cachefiles_ondemand_close, TP_ARGS(obj, msg), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, msg_id ) - __field(unsigned int, object_id ) + __field(unsigned int, obj) + __field(unsigned int, msg_id) + __field(unsigned int, object_id) ), TP_fast_assign( @@ -764,11 +771,11 @@ TRACE_EVENT(cachefiles_ondemand_read, TP_ARGS(obj, msg, load), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, msg_id ) - __field(unsigned int, object_id ) - __field(loff_t, start ) - __field(size_t, len ) + __field(unsigned int, obj) + __field(unsigned int, msg_id) + __field(unsigned int, object_id) + __field(loff_t, start) + __field(size_t, len) ), TP_fast_assign( @@ -793,8 +800,8 @@ TRACE_EVENT(cachefiles_ondemand_cread, TP_ARGS(obj, msg_id), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, msg_id ) + __field(unsigned int, obj) + __field(unsigned int, msg_id) ), TP_fast_assign( @@ -814,10 +821,10 @@ TRACE_EVENT(cachefiles_ondemand_fd_write, TP_ARGS(obj, backer, start, len), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, backer ) - __field(loff_t, start ) - __field(size_t, len ) + __field(unsigned int, obj) + __field(unsigned int, backer) + __field(loff_t, start) + __field(size_t, len) ), TP_fast_assign( @@ -840,8 +847,8 @@ TRACE_EVENT(cachefiles_ondemand_fd_release, TP_ARGS(obj, object_id), TP_STRUCT__entry( - __field(unsigned int, obj ) - __field(unsigned int, object_id ) + __field(unsigned int, obj) + __field(unsigned int, object_id) ), TP_fast_assign( diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index bb8a59c6caa2..d36c857dd249 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -13,6 +13,69 @@ * Thus most bits set go first. */ +/* These define the values that are enums (the bits) */ +#define TRACE_GFP_FLAGS_GENERAL \ + TRACE_GFP_EM(DMA) \ + TRACE_GFP_EM(HIGHMEM) \ + TRACE_GFP_EM(DMA32) \ + TRACE_GFP_EM(MOVABLE) \ + TRACE_GFP_EM(RECLAIMABLE) \ + TRACE_GFP_EM(HIGH) \ + TRACE_GFP_EM(IO) \ + TRACE_GFP_EM(FS) \ + TRACE_GFP_EM(ZERO) \ + TRACE_GFP_EM(DIRECT_RECLAIM) \ + TRACE_GFP_EM(KSWAPD_RECLAIM) \ + TRACE_GFP_EM(WRITE) \ + TRACE_GFP_EM(NOWARN) \ + TRACE_GFP_EM(RETRY_MAYFAIL) \ + TRACE_GFP_EM(NOFAIL) \ + TRACE_GFP_EM(NORETRY) \ + TRACE_GFP_EM(MEMALLOC) \ + TRACE_GFP_EM(COMP) \ + TRACE_GFP_EM(NOMEMALLOC) \ + TRACE_GFP_EM(HARDWALL) \ + TRACE_GFP_EM(THISNODE) \ + TRACE_GFP_EM(ACCOUNT) \ + TRACE_GFP_EM(ZEROTAGS) + +#ifdef CONFIG_KASAN_HW_TAGS +# define TRACE_GFP_FLAGS_KASAN \ + TRACE_GFP_EM(SKIP_ZERO) \ + TRACE_GFP_EM(SKIP_KASAN) +#else +# define TRACE_GFP_FLAGS_KASAN +#endif + +#ifdef CONFIG_LOCKDEP +# define TRACE_GFP_FLAGS_LOCKDEP \ + TRACE_GFP_EM(NOLOCKDEP) +#else +# define TRACE_GFP_FLAGS_LOCKDEP +#endif + +#ifdef CONFIG_SLAB_OBJ_EXT +# define TRACE_GFP_FLAGS_SLAB \ + TRACE_GFP_EM(NO_OBJ_EXT) +#else +# define TRACE_GFP_FLAGS_SLAB +#endif + +#define TRACE_GFP_FLAGS \ + TRACE_GFP_FLAGS_GENERAL \ + TRACE_GFP_FLAGS_KASAN \ + TRACE_GFP_FLAGS_LOCKDEP \ + TRACE_GFP_FLAGS_SLAB + +#undef TRACE_GFP_EM +#define TRACE_GFP_EM(a) TRACE_DEFINE_ENUM(___GFP_##a##_BIT); + +TRACE_GFP_FLAGS + +/* Just in case these are ever used */ +TRACE_DEFINE_ENUM(___GFP_UNUSED_BIT); +TRACE_DEFINE_ENUM(___GFP_LAST_BIT); + #define gfpflag_string(flag) {(__force unsigned long)flag, #flag} #define __def_gfpflag_names \ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index bf511bca896e..6e699cadcb29 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -21,6 +21,7 @@ EM(netfs_read_trace_readahead, "READAHEAD") \ EM(netfs_read_trace_readpage, "READPAGE ") \ EM(netfs_read_trace_read_gaps, "READ-GAPS") \ + EM(netfs_read_trace_read_single, "READ-SNGL") \ EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \ E_(netfs_read_trace_write_begin, "WRITEBEGN") @@ -35,9 +36,11 @@ EM(NETFS_READAHEAD, "RA") \ EM(NETFS_READPAGE, "RP") \ EM(NETFS_READ_GAPS, "RG") \ + EM(NETFS_READ_SINGLE, "R1") \ EM(NETFS_READ_FOR_WRITE, "RW") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ + EM(NETFS_WRITEBACK_SINGLE, "W1") \ EM(NETFS_WRITETHROUGH, "WT") \ EM(NETFS_UNBUFFERED_WRITE, "UW") \ EM(NETFS_DIO_WRITE, "DW") \ @@ -47,17 +50,23 @@ EM(netfs_rreq_trace_assess, "ASSESS ") \ EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ + EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ + EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ + EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ + EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ + EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") @@ -74,6 +83,10 @@ #define netfs_sreq_traces \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ + EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ + EM(netfs_sreq_trace_cache_prepare, "CA-PR") \ + EM(netfs_sreq_trace_cache_write, "CA-WR") \ + EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ @@ -84,6 +97,9 @@ EM(netfs_sreq_trace_hit_eof, "EOF ") \ EM(netfs_sreq_trace_io_progress, "IO ") \ EM(netfs_sreq_trace_limited, "LIMIT") \ + EM(netfs_sreq_trace_need_clear, "N-CLR") \ + EM(netfs_sreq_trace_partial_read, "PARTR") \ + EM(netfs_sreq_trace_need_retry, "NRTRY") \ EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_prep_failed, "PRPFL") \ EM(netfs_sreq_trace_progress, "PRGRS") \ @@ -129,6 +145,7 @@ EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ + EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \ @@ -152,6 +169,7 @@ EM(netfs_streaming_filled_page, "mod-streamw-f") \ EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \ EM(netfs_folio_trace_abandon, "abandon") \ + EM(netfs_folio_trace_alloc_buffer, "alloc-buf") \ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \ EM(netfs_folio_trace_cancel_store, "cancel-store") \ EM(netfs_folio_trace_clear, "clear") \ @@ -168,6 +186,7 @@ EM(netfs_folio_trace_mkwrite, "mkwrite") \ EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ EM(netfs_folio_trace_not_under_wback, "!wback") \ + EM(netfs_folio_trace_not_locked, "!locked") \ EM(netfs_folio_trace_put, "put") \ EM(netfs_folio_trace_read, "read") \ EM(netfs_folio_trace_read_done, "read-done") \ @@ -191,6 +210,14 @@ EM(netfs_trace_donate_to_next, "to-next") \ E_(netfs_trace_donate_to_deferred_next, "defer-next") +#define netfs_folioq_traces \ + EM(netfs_trace_folioq_alloc_buffer, "alloc-buf") \ + EM(netfs_trace_folioq_clear, "clear") \ + EM(netfs_trace_folioq_delete, "delete") \ + EM(netfs_trace_folioq_make_space, "make-space") \ + EM(netfs_trace_folioq_rollbuf_init, "roll-init") \ + E_(netfs_trace_folioq_read_progress, "r-progress") + #ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY #define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY @@ -209,6 +236,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte); enum netfs_folio_trace { netfs_folio_traces } __mode(byte); enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte); enum netfs_donate_trace { netfs_donate_traces } __mode(byte); +enum netfs_folioq_trace { netfs_folioq_traces } __mode(byte); #endif @@ -232,6 +260,7 @@ netfs_sreq_ref_traces; netfs_folio_traces; netfs_collect_contig_traces; netfs_donate_traces; +netfs_folioq_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -250,13 +279,13 @@ TRACE_EVENT(netfs_read, TP_ARGS(rreq, start, len, what), TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(unsigned int, cookie ) - __field(loff_t, i_size ) - __field(loff_t, start ) - __field(size_t, len ) - __field(enum netfs_read_trace, what ) - __field(unsigned int, netfs_inode ) + __field(unsigned int, rreq) + __field(unsigned int, cookie) + __field(loff_t, i_size) + __field(loff_t, start) + __field(size_t, len) + __field(enum netfs_read_trace, what) + __field(unsigned int, netfs_inode) ), TP_fast_assign( @@ -284,10 +313,10 @@ TRACE_EVENT(netfs_rreq, TP_ARGS(rreq, what), TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(unsigned int, flags ) - __field(enum netfs_io_origin, origin ) - __field(enum netfs_rreq_trace, what ) + __field(unsigned int, rreq) + __field(unsigned int, flags) + __field(enum netfs_io_origin, origin) + __field(enum netfs_rreq_trace, what) ), TP_fast_assign( @@ -311,15 +340,16 @@ TRACE_EVENT(netfs_sreq, TP_ARGS(sreq, what), TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(unsigned short, index ) - __field(short, error ) - __field(unsigned short, flags ) - __field(enum netfs_io_source, source ) - __field(enum netfs_sreq_trace, what ) - __field(size_t, len ) - __field(size_t, transferred ) - __field(loff_t, start ) + __field(unsigned int, rreq) + __field(unsigned short, index) + __field(short, error) + __field(unsigned short, flags) + __field(enum netfs_io_source, source) + __field(enum netfs_sreq_trace, what) + __field(u8, slot) + __field(size_t, len) + __field(size_t, transferred) + __field(loff_t, start) ), TP_fast_assign( @@ -332,15 +362,16 @@ TRACE_EVENT(netfs_sreq, __entry->len = sreq->len; __entry->transferred = sreq->transferred; __entry->start = sreq->start; + __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx e=%d", + TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), __entry->flags, __entry->start, __entry->transferred, __entry->len, - __entry->error) + __entry->slot, __entry->error) ); TRACE_EVENT(netfs_failure, @@ -351,15 +382,15 @@ TRACE_EVENT(netfs_failure, TP_ARGS(rreq, sreq, error, what), TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(short, index ) - __field(short, error ) - __field(unsigned short, flags ) - __field(enum netfs_io_source, source ) - __field(enum netfs_failure, what ) - __field(size_t, len ) - __field(size_t, transferred ) - __field(loff_t, start ) + __field(unsigned int, rreq) + __field(short, index) + __field(short, error) + __field(unsigned short, flags) + __field(enum netfs_io_source, source) + __field(enum netfs_failure, what) + __field(size_t, len) + __field(size_t, transferred) + __field(loff_t, start) ), TP_fast_assign( @@ -390,9 +421,9 @@ TRACE_EVENT(netfs_rreq_ref, TP_ARGS(rreq_debug_id, ref, what), TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(int, ref ) - __field(enum netfs_rreq_ref_trace, what ) + __field(unsigned int, rreq) + __field(int, ref) + __field(enum netfs_rreq_ref_trace, what) ), TP_fast_assign( @@ -414,10 +445,10 @@ TRACE_EVENT(netfs_sreq_ref, TP_ARGS(rreq_debug_id, subreq_debug_index, ref, what), TP_STRUCT__entry( - __field(unsigned int, rreq ) - __field(unsigned int, subreq ) - __field(int, ref ) - __field(enum netfs_sreq_ref_trace, what ) + __field(unsigned int, rreq) + __field(unsigned int, subreq) + __field(int, ref) + __field(enum netfs_sreq_ref_trace, what) ), TP_fast_assign( @@ -465,10 +496,10 @@ TRACE_EVENT(netfs_write_iter, TP_ARGS(iocb, from), TP_STRUCT__entry( - __field(unsigned long long, start ) - __field(size_t, len ) - __field(unsigned int, flags ) - __field(unsigned int, ino ) + __field(unsigned long long, start) + __field(size_t, len) + __field(unsigned int, flags) + __field(unsigned int, ino) ), TP_fast_assign( @@ -489,12 +520,12 @@ TRACE_EVENT(netfs_write, TP_ARGS(wreq, what), TP_STRUCT__entry( - __field(unsigned int, wreq ) - __field(unsigned int, cookie ) - __field(unsigned int, ino ) - __field(enum netfs_write_trace, what ) - __field(unsigned long long, start ) - __field(unsigned long long, len ) + __field(unsigned int, wreq) + __field(unsigned int, cookie) + __field(unsigned int, ino) + __field(enum netfs_write_trace, what) + __field(unsigned long long, start) + __field(unsigned long long, len) ), TP_fast_assign( @@ -522,10 +553,10 @@ TRACE_EVENT(netfs_collect, TP_ARGS(wreq), TP_STRUCT__entry( - __field(unsigned int, wreq ) - __field(unsigned int, len ) - __field(unsigned long long, transferred ) - __field(unsigned long long, start ) + __field(unsigned int, wreq) + __field(unsigned int, len) + __field(unsigned long long, transferred) + __field(unsigned long long, start) ), TP_fast_assign( @@ -548,12 +579,12 @@ TRACE_EVENT(netfs_collect_sreq, TP_ARGS(wreq, subreq), TP_STRUCT__entry( - __field(unsigned int, wreq ) - __field(unsigned int, subreq ) - __field(unsigned int, stream ) - __field(unsigned int, len ) - __field(unsigned int, transferred ) - __field(unsigned long long, start ) + __field(unsigned int, wreq) + __field(unsigned int, subreq) + __field(unsigned int, stream) + __field(unsigned int, len) + __field(unsigned int, transferred) + __field(unsigned long long, start) ), TP_fast_assign( @@ -579,11 +610,11 @@ TRACE_EVENT(netfs_collect_folio, TP_ARGS(wreq, folio, fend, collected_to), TP_STRUCT__entry( - __field(unsigned int, wreq ) - __field(unsigned long, index ) - __field(unsigned long long, fend ) - __field(unsigned long long, cleaned_to ) - __field(unsigned long long, collected_to ) + __field(unsigned int, wreq) + __field(unsigned long, index) + __field(unsigned long long, fend) + __field(unsigned long long, cleaned_to) + __field(unsigned long long, collected_to) ), TP_fast_assign( @@ -608,10 +639,10 @@ TRACE_EVENT(netfs_collect_state, TP_ARGS(wreq, collected_to, notes), TP_STRUCT__entry( - __field(unsigned int, wreq ) - __field(unsigned int, notes ) - __field(unsigned long long, collected_to ) - __field(unsigned long long, cleaned_to ) + __field(unsigned int, wreq) + __field(unsigned int, notes) + __field(unsigned long long, collected_to) + __field(unsigned long long, cleaned_to) ), TP_fast_assign( @@ -680,69 +711,27 @@ TRACE_EVENT(netfs_collect_stream, __entry->collected_to, __entry->front) ); -TRACE_EVENT(netfs_progress, - TP_PROTO(const struct netfs_io_subrequest *subreq, - unsigned long long start, size_t avail, size_t part), +TRACE_EVENT(netfs_folioq, + TP_PROTO(const struct folio_queue *fq, + enum netfs_folioq_trace trace), - TP_ARGS(subreq, start, avail, part), + TP_ARGS(fq, trace), TP_STRUCT__entry( __field(unsigned int, rreq) - __field(unsigned int, subreq) - __field(unsigned int, consumed) - __field(unsigned int, transferred) - __field(unsigned long long, f_start) - __field(unsigned int, f_avail) - __field(unsigned int, f_part) - __field(unsigned char, slot) + __field(unsigned int, id) + __field(enum netfs_folioq_trace, trace) ), TP_fast_assign( - __entry->rreq = subreq->rreq->debug_id; - __entry->subreq = subreq->debug_index; - __entry->consumed = subreq->consumed; - __entry->transferred = subreq->transferred; - __entry->f_start = start; - __entry->f_avail = avail; - __entry->f_part = part; - __entry->slot = subreq->curr_folioq_slot; - ), - - TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x", - __entry->rreq, __entry->subreq, __entry->f_start, - __entry->consumed, __entry->transferred, - __entry->f_part, __entry->f_avail, __entry->slot) - ); - -TRACE_EVENT(netfs_donate, - TP_PROTO(const struct netfs_io_request *rreq, - const struct netfs_io_subrequest *from, - const struct netfs_io_subrequest *to, - size_t amount, - enum netfs_donate_trace trace), - - TP_ARGS(rreq, from, to, amount, trace), - - TP_STRUCT__entry( - __field(unsigned int, rreq) - __field(unsigned int, from) - __field(unsigned int, to) - __field(unsigned int, amount) - __field(enum netfs_donate_trace, trace) - ), - - TP_fast_assign( - __entry->rreq = rreq->debug_id; - __entry->from = from->debug_index; - __entry->to = to ? to->debug_index : -1; - __entry->amount = amount; + __entry->rreq = fq ? fq->rreq_id : 0; + __entry->id = fq ? fq->debug_id : 0; __entry->trace = trace; ), - TP_printk("R=%08x[%02x] -> [%02x] %s am=%x", - __entry->rreq, __entry->from, __entry->to, - __print_symbolic(__entry->trace, netfs_donate_traces), - __entry->amount) + TP_printk("R=%08x fq=%x %s", + __entry->rreq, __entry->id, + __print_symbolic(__entry->trace, netfs_folioq_traces)) ); #undef EM diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h new file mode 100644 index 000000000000..a706ead39082 --- /dev/null +++ b/include/uapi/drm/amdxdna_accel.h @@ -0,0 +1,501 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _UAPI_AMDXDNA_ACCEL_H_ +#define _UAPI_AMDXDNA_ACCEL_H_ + +#include <linux/stddef.h> +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define AMDXDNA_INVALID_CMD_HANDLE (~0UL) +#define AMDXDNA_INVALID_ADDR (~0UL) +#define AMDXDNA_INVALID_CTX_HANDLE 0 +#define AMDXDNA_INVALID_BO_HANDLE 0 +#define AMDXDNA_INVALID_FENCE_HANDLE 0 + +enum amdxdna_device_type { + AMDXDNA_DEV_TYPE_UNKNOWN = -1, + AMDXDNA_DEV_TYPE_KMQ, +}; + +enum amdxdna_drm_ioctl_id { + DRM_AMDXDNA_CREATE_HWCTX, + DRM_AMDXDNA_DESTROY_HWCTX, + DRM_AMDXDNA_CONFIG_HWCTX, + DRM_AMDXDNA_CREATE_BO, + DRM_AMDXDNA_GET_BO_INFO, + DRM_AMDXDNA_SYNC_BO, + DRM_AMDXDNA_EXEC_CMD, + DRM_AMDXDNA_GET_INFO, + DRM_AMDXDNA_SET_STATE, +}; + +/** + * struct qos_info - QoS information for driver. + * @gops: Giga operations per second. + * @fps: Frames per second. + * @dma_bandwidth: DMA bandwidtha. + * @latency: Frame response latency. + * @frame_exec_time: Frame execution time. + * @priority: Request priority. + * + * User program can provide QoS hints to driver. + */ +struct amdxdna_qos_info { + __u32 gops; + __u32 fps; + __u32 dma_bandwidth; + __u32 latency; + __u32 frame_exec_time; + __u32 priority; +}; + +/** + * struct amdxdna_drm_create_hwctx - Create hardware context. + * @ext: MBZ. + * @ext_flags: MBZ. + * @qos_p: Address of QoS info. + * @umq_bo: BO handle for user mode queue(UMQ). + * @log_buf_bo: BO handle for log buffer. + * @max_opc: Maximum operations per cycle. + * @num_tiles: Number of AIE tiles. + * @mem_size: Size of AIE tile memory. + * @umq_doorbell: Returned offset of doorbell associated with UMQ. + * @handle: Returned hardware context handle. + * @syncobj_handle: Returned syncobj handle for command completion. + */ +struct amdxdna_drm_create_hwctx { + __u64 ext; + __u64 ext_flags; + __u64 qos_p; + __u32 umq_bo; + __u32 log_buf_bo; + __u32 max_opc; + __u32 num_tiles; + __u32 mem_size; + __u32 umq_doorbell; + __u32 handle; + __u32 syncobj_handle; +}; + +/** + * struct amdxdna_drm_destroy_hwctx - Destroy hardware context. + * @handle: Hardware context handle. + * @pad: MBZ. + */ +struct amdxdna_drm_destroy_hwctx { + __u32 handle; + __u32 pad; +}; + +/** + * struct amdxdna_cu_config - configuration for one CU + * @cu_bo: CU configuration buffer bo handle. + * @cu_func: Function of a CU. + * @pad: MBZ. + */ +struct amdxdna_cu_config { + __u32 cu_bo; + __u8 cu_func; + __u8 pad[3]; +}; + +/** + * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware context + * @num_cus: Number of CUs to configure. + * @pad: MBZ. + * @cu_configs: Array of CU configurations of struct amdxdna_cu_config. + */ +struct amdxdna_hwctx_param_config_cu { + __u16 num_cus; + __u16 pad[3]; + struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); +}; + +enum amdxdna_drm_config_hwctx_param { + DRM_AMDXDNA_HWCTX_CONFIG_CU, + DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF, + DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF, +}; + +/** + * struct amdxdna_drm_config_hwctx - Configure hardware context. + * @handle: hardware context handle. + * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the + * structure passed in via param_val. + * @param_val: A structure specified by the param_type struct member. + * @param_val_size: Size of the parameter buffer pointed to by the param_val. + * If param_val is not a pointer, driver can ignore this. + * @pad: MBZ. + * + * Note: if the param_val is a pointer pointing to a buffer, the maximum size + * of the buffer is 4KiB(PAGE_SIZE). + */ +struct amdxdna_drm_config_hwctx { + __u32 handle; + __u32 param_type; + __u64 param_val; + __u32 param_val_size; + __u32 pad; +}; + +enum amdxdna_bo_type { + AMDXDNA_BO_INVALID = 0, + AMDXDNA_BO_SHMEM, + AMDXDNA_BO_DEV_HEAP, + AMDXDNA_BO_DEV, + AMDXDNA_BO_CMD, +}; + +/** + * struct amdxdna_drm_create_bo - Create a buffer object. + * @flags: Buffer flags. MBZ. + * @vaddr: User VA of buffer if applied. MBZ. + * @size: Size in bytes. + * @type: Buffer type. + * @handle: Returned DRM buffer object handle. + */ +struct amdxdna_drm_create_bo { + __u64 flags; + __u64 vaddr; + __u64 size; + __u32 type; + __u32 handle; +}; + +/** + * struct amdxdna_drm_get_bo_info - Get buffer object information. + * @ext: MBZ. + * @ext_flags: MBZ. + * @handle: DRM buffer object handle. + * @pad: MBZ. + * @map_offset: Returned DRM fake offset for mmap(). + * @vaddr: Returned user VA of buffer. 0 in case user needs mmap(). + * @xdna_addr: Returned XDNA device virtual address. + */ +struct amdxdna_drm_get_bo_info { + __u64 ext; + __u64 ext_flags; + __u32 handle; + __u32 pad; + __u64 map_offset; + __u64 vaddr; + __u64 xdna_addr; +}; + +/** + * struct amdxdna_drm_sync_bo - Sync buffer object. + * @handle: Buffer object handle. + * @direction: Direction of sync, can be from device or to device. + * @offset: Offset in the buffer to sync. + * @size: Size in bytes. + */ +struct amdxdna_drm_sync_bo { + __u32 handle; +#define SYNC_DIRECT_TO_DEVICE 0U +#define SYNC_DIRECT_FROM_DEVICE 1U + __u32 direction; + __u64 offset; + __u64 size; +}; + +enum amdxdna_cmd_type { + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, + AMDXDNA_CMD_SUBMIT_DEPENDENCY, + AMDXDNA_CMD_SUBMIT_SIGNAL, +}; + +/** + * struct amdxdna_drm_exec_cmd - Execute command. + * @ext: MBZ. + * @ext_flags: MBZ. + * @hwctx: Hardware context handle. + * @type: One of command type in enum amdxdna_cmd_type. + * @cmd_handles: Array of command handles or the command handle itself + * in case of just one. + * @args: Array of arguments for all command handles. + * @cmd_count: Number of command handles in the cmd_handles array. + * @arg_count: Number of arguments in the args array. + * @seq: Returned sequence number for this command. + */ +struct amdxdna_drm_exec_cmd { + __u64 ext; + __u64 ext_flags; + __u32 hwctx; + __u32 type; + __u64 cmd_handles; + __u64 args; + __u32 cmd_count; + __u32 arg_count; + __u64 seq; +}; + +/** + * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware + * @buffer: The user space buffer that will return the AIE status. + * @buffer_size: The size of the user space buffer. + * @cols_filled: A bitmap of AIE columns whose data has been returned in the buffer. + */ +struct amdxdna_drm_query_aie_status { + __u64 buffer; /* out */ + __u32 buffer_size; /* in */ + __u32 cols_filled; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_version - Query the version of the AIE hardware + * @major: The major version number. + * @minor: The minor version number. + */ +struct amdxdna_drm_query_aie_version { + __u32 major; /* out */ + __u32 minor; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_tile_metadata - Query the metadata of AIE tile (core, mem, shim) + * @row_count: The number of rows. + * @row_start: The starting row number. + * @dma_channel_count: The number of dma channels. + * @lock_count: The number of locks. + * @event_reg_count: The number of events. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_aie_tile_metadata { + __u16 row_count; + __u16 row_start; + __u16 dma_channel_count; + __u16 lock_count; + __u16 event_reg_count; + __u16 pad[3]; +}; + +/** + * struct amdxdna_drm_query_aie_metadata - Query the metadata of the AIE hardware + * @col_size: The size of a column in bytes. + * @cols: The total number of columns. + * @rows: The total number of rows. + * @version: The version of the AIE hardware. + * @core: The metadata for all core tiles. + * @mem: The metadata for all mem tiles. + * @shim: The metadata for all shim tiles. + */ +struct amdxdna_drm_query_aie_metadata { + __u32 col_size; + __u16 cols; + __u16 rows; + struct amdxdna_drm_query_aie_version version; + struct amdxdna_drm_query_aie_tile_metadata core; + struct amdxdna_drm_query_aie_tile_metadata mem; + struct amdxdna_drm_query_aie_tile_metadata shim; +}; + +/** + * struct amdxdna_drm_query_clock - Metadata for a clock + * @name: The clock name. + * @freq_mhz: The clock frequency. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_clock { + __u8 name[16]; + __u32 freq_mhz; + __u32 pad; +}; + +/** + * struct amdxdna_drm_query_clock_metadata - Query metadata for clocks + * @mp_npu_clock: The metadata for MP-NPU clock. + * @h_clock: The metadata for H clock. + */ +struct amdxdna_drm_query_clock_metadata { + struct amdxdna_drm_query_clock mp_npu_clock; + struct amdxdna_drm_query_clock h_clock; +}; + +enum amdxdna_sensor_type { + AMDXDNA_SENSOR_TYPE_POWER +}; + +/** + * struct amdxdna_drm_query_sensor - The data for single sensor. + * @label: The name for a sensor. + * @input: The current value of the sensor. + * @max: The maximum value possible for the sensor. + * @average: The average value of the sensor. + * @highest: The highest recorded sensor value for this driver load for the sensor. + * @status: The sensor status. + * @units: The sensor units. + * @unitm: Translates value member variables into the correct unit via (pow(10, unitm) * value). + * @type: The sensor type from enum amdxdna_sensor_type. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_sensor { + __u8 label[64]; + __u32 input; + __u32 max; + __u32 average; + __u32 highest; + __u8 status[64]; + __u8 units[16]; + __s8 unitm; + __u8 type; + __u8 pad[6]; +}; + +/** + * struct amdxdna_drm_query_hwctx - The data for single context. + * @context_id: The ID for this context. + * @start_col: The starting column for the partition assigned to this context. + * @num_col: The number of columns in the partition assigned to this context. + * @pad: Structure padding. + * @pid: The Process ID of the process that created this context. + * @command_submissions: The number of commands submitted to this context. + * @command_completions: The number of commands completed by this context. + * @migrations: The number of times this context has been moved to a different partition. + * @preemptions: The number of times this context has been preempted by another context in the + * same partition. + * @errors: The errors for this context. + */ +struct amdxdna_drm_query_hwctx { + __u32 context_id; + __u32 start_col; + __u32 num_col; + __u32 pad; + __s64 pid; + __u64 command_submissions; + __u64 command_completions; + __u64 migrations; + __u64 preemptions; + __u64 errors; +}; + +enum amdxdna_power_mode_type { + POWER_MODE_DEFAULT, /* Fallback to calculated DPM */ + POWER_MODE_LOW, /* Set frequency to lowest DPM */ + POWER_MODE_MEDIUM, /* Set frequency to medium DPM */ + POWER_MODE_HIGH, /* Set frequency to highest DPM */ + POWER_MODE_TURBO, /* Maximum power */ +}; + +/** + * struct amdxdna_drm_get_power_mode - Get the configured power mode + * @power_mode: The mode type from enum amdxdna_power_mode_type + * @pad: Structure padding. + */ +struct amdxdna_drm_get_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +/** + * struct amdxdna_drm_query_firmware_version - Query the firmware version + * @major: The major version number + * @minor: The minor version number + * @patch: The patch level version number + * @build: The build ID + */ +struct amdxdna_drm_query_firmware_version { + __u32 major; /* out */ + __u32 minor; /* out */ + __u32 patch; /* out */ + __u32 build; /* out */ +}; + +enum amdxdna_drm_get_param { + DRM_AMDXDNA_QUERY_AIE_STATUS, + DRM_AMDXDNA_QUERY_AIE_METADATA, + DRM_AMDXDNA_QUERY_AIE_VERSION, + DRM_AMDXDNA_QUERY_CLOCK_METADATA, + DRM_AMDXDNA_QUERY_SENSORS, + DRM_AMDXDNA_QUERY_HW_CONTEXTS, + DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, + DRM_AMDXDNA_GET_POWER_MODE, +}; + +/** + * struct amdxdna_drm_get_info - Get some information from the AIE hardware. + * @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed in the buffer. + * @buffer_size: Size of the input buffer. Size needed/written by the kernel. + * @buffer: A structure specified by the param struct member. + */ +struct amdxdna_drm_get_info { + __u32 param; /* in */ + __u32 buffer_size; /* in/out */ + __u64 buffer; /* in/out */ +}; + +enum amdxdna_drm_set_param { + DRM_AMDXDNA_SET_POWER_MODE, + DRM_AMDXDNA_WRITE_AIE_MEM, + DRM_AMDXDNA_WRITE_AIE_REG, +}; + +/** + * struct amdxdna_drm_set_state - Set the state of the AIE hardware. + * @param: Value in enum amdxdna_drm_set_param. + * @buffer_size: Size of the input param. + * @buffer: Pointer to the input param. + */ +struct amdxdna_drm_set_state { + __u32 param; /* in */ + __u32 buffer_size; /* in */ + __u64 buffer; /* in */ +}; + +/** + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware + * @power_mode: The sensor type from enum amdxdna_power_mode_type + * @pad: MBZ. + */ +struct amdxdna_drm_set_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ + struct amdxdna_drm_create_hwctx) + +#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, \ + struct amdxdna_drm_destroy_hwctx) + +#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, \ + struct amdxdna_drm_config_hwctx) + +#define DRM_IOCTL_AMDXDNA_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_BO, \ + struct amdxdna_drm_create_bo) + +#define DRM_IOCTL_AMDXDNA_GET_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_BO_INFO, \ + struct amdxdna_drm_get_bo_info) + +#define DRM_IOCTL_AMDXDNA_SYNC_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \ + struct amdxdna_drm_sync_bo) + +#define DRM_IOCTL_AMDXDNA_EXEC_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \ + struct amdxdna_drm_exec_cmd) + +#define DRM_IOCTL_AMDXDNA_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \ + struct amdxdna_drm_get_info) + +#define DRM_IOCTL_AMDXDNA_SET_STATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \ + struct amdxdna_drm_set_state) + +#if defined(__cplusplus) +} /* extern c end */ +#endif + +#endif /* _UAPI_AMDXDNA_ACCEL_H_ */ diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index b916aab80dde..2342cb90857e 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -90,6 +90,7 @@ struct drm_msm_timespec { #define MSM_PARAM_RAYTRACING 0x11 /* RO */ #define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ #define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ +#define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 87c9cb555dd1..b99763cbae48 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -923,6 +923,15 @@ enum drm_panthor_group_state_flags { * When a group ends up with this flag set, no jobs can be submitted to its queues. */ DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, + + /** + * @DRM_PANTHOR_GROUP_STATE_INNOCENT: Group was killed during a reset caused by other + * groups. + * + * This flag can only be set if DRM_PANTHOR_GROUP_STATE_TIMEDOUT is set and + * DRM_PANTHOR_GROUP_STATE_FATAL_FAULT is not. + */ + DRM_PANTHOR_GROUP_STATE_INNOCENT = 1 << 2, }; /** diff --git a/include/uapi/drm/qaic_accel.h b/include/uapi/drm/qaic_accel.h index d3ca876a08e9..c92d0309d583 100644 --- a/include/uapi/drm/qaic_accel.h +++ b/include/uapi/drm/qaic_accel.h @@ -64,7 +64,7 @@ struct qaic_manage_trans_hdr { /** * struct qaic_manage_trans_passthrough - Defines a passthrough transaction. * @hdr: In. Header to identify this transaction. - * @data: In. Payload of this ransaction. Opaque to the driver. Userspace must + * @data: In. Payload of this transaction. Opaque to the driver. Userspace must * encode in little endian and align/pad to 64-bit. */ struct qaic_manage_trans_passthrough { diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 2376c73abca1..dbbc404d2b3d 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -43,6 +43,7 @@ extern "C" { #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b #define DRM_V3D_PERFMON_GET_COUNTER 0x0c +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -61,6 +62,8 @@ extern "C" { #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ struct drm_v3d_perfmon_get_counter) +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ + struct drm_v3d_perfmon_set_global) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -766,6 +769,21 @@ struct drm_v3d_perfmon_get_counter { __u8 reserved[7]; }; +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 + +/** + * struct drm_v3d_perfmon_set_global - ioctl to define a global performance + * monitor + * + * The global performance monitor will be used for all jobs. If a global + * performance monitor is defined, jobs with a self-defined performance + * monitor won't be allowed. + */ +struct drm_v3d_perfmon_set_global { + __u32 flags; + __u32 id; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4a8a4a63e99c..f62689ca861a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1486,6 +1486,8 @@ struct drm_xe_oa_unit { __u64 capabilities; #define DRM_XE_OA_CAPS_BASE (1 << 0) #define DRM_XE_OA_CAPS_SYNCS (1 << 1) +#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) +#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) /** @oa_timestamp_freq: OA timestamp freq */ __u64 oa_timestamp_freq; @@ -1651,6 +1653,20 @@ enum drm_xe_oa_property_id { * to the VM bind case. */ DRM_XE_OA_PROPERTY_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be + * allocated by the driver in bytes. Supported sizes are powers of + * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA + * buffer is allocated by default. + */ + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, + + /** + * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait + * for before unblocking poll or read + */ + DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS, }; /** diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h index 24ca0c00cae3..9d9e8ae32b41 100644 --- a/include/uapi/linux/fiemap.h +++ b/include/uapi/linux/fiemap.h @@ -14,37 +14,56 @@ #include <linux/types.h> +/** + * struct fiemap_extent - description of one fiemap extent + * @fe_logical: byte offset of the extent in the file + * @fe_physical: byte offset of extent on disk + * @fe_length: length in bytes for this extent + * @fe_flags: FIEMAP_EXTENT_* flags for this extent + */ struct fiemap_extent { - __u64 fe_logical; /* logical offset in bytes for the start of - * the extent from the beginning of the file */ - __u64 fe_physical; /* physical offset in bytes for the start - * of the extent from the beginning of the disk */ - __u64 fe_length; /* length in bytes for this extent */ + __u64 fe_logical; + __u64 fe_physical; + __u64 fe_length; + /* private: */ __u64 fe_reserved64[2]; - __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + /* public: */ + __u32 fe_flags; + /* private: */ __u32 fe_reserved[3]; }; +/** + * struct fiemap - file extent mappings + * @fm_start: byte offset (inclusive) at which to start mapping (in) + * @fm_length: logical length of mapping which userspace wants (in) + * @fm_flags: FIEMAP_FLAG_* flags for request (in/out) + * @fm_mapped_extents: number of extents that were mapped (out) + * @fm_extent_count: size of fm_extents array (in) + * @fm_extents: array of mapped extents (out) + */ struct fiemap { - __u64 fm_start; /* logical offset (inclusive) at - * which to start mapping (in) */ - __u64 fm_length; /* logical length of mapping which - * userspace wants (in) */ - __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ - __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ - __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u64 fm_start; + __u64 fm_length; + __u32 fm_flags; + __u32 fm_mapped_extents; + __u32 fm_extent_count; + /* private: */ __u32 fm_reserved; - struct fiemap_extent fm_extents[]; /* array of mapped extents (out) */ + /* public: */ + struct fiemap_extent fm_extents[]; }; #define FIEMAP_MAX_OFFSET (~0ULL) +/* flags used in fm_flags: */ #define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ #define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ #define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */ #define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) +/* flags used in fe_flags: */ #define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ #define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ #define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 753971770733..2bbe00cf1248 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* flags for integrity meta */ +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ + +#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ + IO_INTEGRITY_CHK_REFTAG | \ + IO_INTEGRITY_CHK_APPTAG) + #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ @@ -332,9 +341,13 @@ typedef int __bitwise __kernel_rwf_t; /* Atomic Write */ #define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index aac9a4f8fa9a..e11c82638527 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -98,6 +98,10 @@ struct io_uring_sqe { __u64 addr3; __u64 __pad2[1]; }; + struct { + __u64 attr_ptr; /* pointer to attribute information */ + __u64 attr_type_mask; /* bit mask of attributes */ + }; __u64 optval; /* * If the ring is initialized with IORING_SETUP_SQE128, then @@ -107,6 +111,18 @@ struct io_uring_sqe { }; }; +/* sqe->attr_type_mask flags */ +#define IORING_RW_ATTR_FLAG_PI (1U << 0) +/* PI attribute information */ +struct io_uring_attr_pi { + __u16 flags; + __u16 app_tag; + __u32 len; + __u64 addr; + __u64 seed; + __u64 rsvd; +}; + /* * If sqe->file_index is set to this for opcodes that instantiate a new * direct descriptor (like openat/openat2/accept), then io_uring will allocate @@ -561,6 +577,7 @@ struct io_uring_params { #define IORING_FEAT_REG_REG_RING (1U << 13) #define IORING_FEAT_RECVSEND_BUNDLE (1U << 14) #define IORING_FEAT_MIN_TIMEOUT (1U << 15) +#define IORING_FEAT_RW_ATTR (1U << 16) /* * io_uring_register(2) opcodes and arguments diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 5a43c23f53bf..ff47b6f0ba0f 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -233,7 +233,7 @@ struct mdp_superblock_1 { char set_name[32]; /* set and interpreted by user-space */ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* 0,1,4,5 */ + __le32 level; /* 0,1,4,5, -1 (linear) */ __le32 layout; /* only for raid5 and raid10 currently */ __le64 size; /* used size of component devices, in 512byte sectors */ diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h index 7be89a4906e7..a893010735fb 100644 --- a/include/uapi/linux/raid/md_u.h +++ b/include/uapi/linux/raid/md_u.h @@ -103,6 +103,8 @@ typedef struct mdu_array_info_s { } mdu_array_info_t; +#define LEVEL_LINEAR (-1) + /* we need a value for 'no level specified' and 0 * means 'raid0', so we need something else. This is * for internal use only diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/include/vdso/page.h b/include/vdso/page.h index 710ae2414e68..bc47186c07fc 100644 --- a/include/vdso/page.h +++ b/include/vdso/page.h @@ -8,7 +8,7 @@ * PAGE_SHIFT determines the page size. * * Note: This definition is required because PAGE_SHIFT is used - * in several places throuout the codebase. + * in several places throughout the codebase. */ #define PAGE_SHIFT CONFIG_PAGE_SHIFT |