diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-25 09:19:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-25 09:19:23 -0800 |
commit | 7c3dc440b1f5c75f45e24430f913e561dc82a419 (patch) | |
tree | 7c3e037691d712a20f7e95279cc1e090328d5d44 /drivers/cxl/cxlmem.h | |
parent | d8e473182ab9e85708067be81d20424045d939fa (diff) | |
parent | e686c32590f40bffc45f105c04c836ffad3e531a (diff) |
Merge tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull Compute Express Link (CXL) updates from Dan Williams:
"To date Linux has been dependent on platform-firmware to map CXL RAM
regions and handle events / errors from devices. With this update we
can now parse / update the CXL memory layout, and report events /
errors from devices. This is a precursor for the CXL subsystem to
handle the end-to-end "RAS" flow for CXL memory. i.e. the flow that
for DDR-attached-DRAM is handled by the EDAC driver where it maps
system physical address events to a field-replaceable-unit (FRU /
endpoint device). In general, CXL has the potential to standardize
what has historically been a pile of memory-controller-specific error
handling logic.
Another change of note is the default policy for handling RAM-backed
device-dax instances. Previously the default access mode was "device",
mmap(2) a device special file to access memory. The new default is
"kmem" where the address range is assigned to the core-mm via
add_memory_driver_managed(). This saves typical users from wondering
why their platform memory is not visible via free(1) and stuck behind
a device-file. At the same time it allows expert users to deploy
policy to, for example, get dedicated access to high performance
memory, or hide low performance memory from general purpose kernel
allocations. This affects not only CXL, but also systems with
high-bandwidth-memory that platform-firmware tags with the
EFI_MEMORY_SP (special purpose) designation.
Summary:
- CXL RAM region enumeration: instantiate 'struct cxl_region' objects
for platform firmware created memory regions
- CXL RAM region provisioning: complement the existing PMEM region
creation support with RAM region support
- "Soft Reservation" policy change: Online (memory hot-add)
soft-reserved memory (EFI_MEMORY_SP) by default, but still allow
for setting aside such memory for dedicated access via device-dax.
- CXL Events and Interrupts: Takeover CXL event handling from
platform-firmware (ACPI calls this CXL Memory Error Reporting) and
export CXL Events via Linux Trace Events.
- Convey CXL _OSC results to drivers: Similar to PCI, let the CXL
subsystem interrogate the result of CXL _OSC negotiation.
- Emulate CXL DVSEC Range Registers as "decoders": Allow for
first-generation devices that pre-date the definition of the CXL
HDM Decoder Capability to translate the CXL DVSEC Range Registers
into 'struct cxl_decoder' objects.
- Set timestamp: Per spec, set the device timestamp in case of
hotplug, or if platform-firwmare failed to set it.
- General fixups: linux-next build issues, non-urgent fixes for
pre-production hardware, unit test fixes, spelling and debug
message improvements"
* tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (66 commits)
dax/kmem: Fix leak of memory-hotplug resources
cxl/mem: Add kdoc param for event log driver state
cxl/trace: Add serial number to trace points
cxl/trace: Add host output to trace points
cxl/trace: Standardize device information output
cxl/pci: Remove locked check for dvsec_range_allowed()
cxl/hdm: Add emulation when HDM decoders are not committed
cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders
cxl/hdm: Emulate HDM decoder from DVSEC range registers
cxl/pci: Refactor cxl_hdm_decode_init()
cxl/port: Export cxl_dvsec_rr_decode() to cxl_port
cxl/pci: Break out range register decoding from cxl_hdm_decode_init()
cxl: add RAS status unmasking for CXL
cxl: remove unnecessary calling of pci_enable_pcie_error_reporting()
dax/hmem: build hmem device support as module if possible
dax: cxl: add CXL_REGION dependency
cxl: avoid returning uninitialized error code
cxl/pmem: Fix nvdimm registration races
cxl/mem: Fix UAPI command comment
cxl/uapi: Tag commands from cxl_query_cmd()
...
Diffstat (limited to 'drivers/cxl/cxlmem.h')
-rw-r--r-- | drivers/cxl/cxlmem.h | 186 |
1 files changed, 177 insertions, 9 deletions
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 6749f2afb1b7..090acebba4fa 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -4,6 +4,7 @@ #define __CXL_MEM_H__ #include <uapi/linux/cxl_mem.h> #include <linux/cdev.h> +#include <linux/uuid.h> #include "cxl.h" /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ @@ -38,6 +39,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @id: id number of this memdev instance. + * @depth: endpoint port depth */ struct cxl_memdev { struct device dev; @@ -47,6 +49,7 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; int id; + int depth; }; static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) @@ -79,6 +82,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) } struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, + resource_size_t base, resource_size_t len, + resource_size_t skipped); static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, struct cxl_memdev *cxlmd) @@ -182,15 +188,31 @@ static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) #define CXL_CAPACITY_MULTIPLIER SZ_256M /** - * struct cxl_endpoint_dvsec_info - Cached DVSEC info - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE - * @ranges: Number of active HDM ranges this device uses. - * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE + * Event Interrupt Policy + * + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52 + */ +enum cxl_event_int_mode { + CXL_INT_NONE = 0x00, + CXL_INT_MSI_MSIX = 0x01, + CXL_INT_FW = 0x02 +}; +struct cxl_event_interrupt_policy { + u8 info_settings; + u8 warn_settings; + u8 failure_settings; + u8 fatal_settings; +} __packed; + +/** + * struct cxl_event_state - Event log driver state + * + * @event_buf: Buffer to receive event data + * @event_log_lock: Serialize event_buf and log use */ -struct cxl_endpoint_dvsec_info { - bool mem_enabled; - int ranges; - struct range dvsec_range[2]; +struct cxl_event_state { + struct cxl_get_event_payload *buf; + struct mutex log_lock; }; /** @@ -228,6 +250,7 @@ struct cxl_endpoint_dvsec_info { * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number * @doe_mbs: PCI DOE mailbox array + * @event: event log driver state * @mbox_send: @dev specific transport for transmitting mailbox commands * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for @@ -266,14 +289,21 @@ struct cxl_dev_state { struct xarray doe_mbs; + struct cxl_event_state event; + int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); }; enum cxl_opcode { CXL_MBOX_OP_INVALID = 0x0000, CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID, + CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100, + CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101, + CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102, + CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103, CXL_MBOX_OP_GET_FW_INFO = 0x0200, CXL_MBOX_OP_ACTIVATE_FW = 0x0202, + CXL_MBOX_OP_SET_TIMESTAMP = 0x0301, CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400, CXL_MBOX_OP_GET_LOG = 0x0401, CXL_MBOX_OP_IDENTIFY = 0x4000, @@ -347,6 +377,136 @@ struct cxl_mbox_identify { u8 qos_telemetry_caps; } __packed; +/* + * Common Event Record Format + * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 + */ +struct cxl_event_record_hdr { + uuid_t id; + u8 length; + u8 flags[3]; + __le16 handle; + __le16 related_handle; + __le64 timestamp; + u8 maint_op_class; + u8 reserved[15]; +} __packed; + +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 +struct cxl_event_record_raw { + struct cxl_event_record_hdr hdr; + u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; +} __packed; + +/* + * Get Event Records output payload + * CXL rev 3.0 section 8.2.9.2.2; Table 8-50 + */ +#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0) +#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1) +struct cxl_get_event_payload { + u8 flags; + u8 reserved1; + __le16 overflow_err_count; + __le64 first_overflow_timestamp; + __le64 last_overflow_timestamp; + __le16 record_count; + u8 reserved2[10]; + struct cxl_event_record_raw records[]; +} __packed; + +/* + * CXL rev 3.0 section 8.2.9.2.2; Table 8-49 + */ +enum cxl_event_log_type { + CXL_EVENT_TYPE_INFO = 0x00, + CXL_EVENT_TYPE_WARN, + CXL_EVENT_TYPE_FAIL, + CXL_EVENT_TYPE_FATAL, + CXL_EVENT_TYPE_MAX +}; + +/* + * Clear Event Records input payload + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51 + */ +struct cxl_mbox_clear_event_payload { + u8 event_log; /* enum cxl_event_log_type */ + u8 clear_flags; + u8 nr_recs; + u8 reserved[3]; + __le16 handles[]; +} __packed; +#define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX + +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 +struct cxl_event_gen_media { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 device[3]; + u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; + u8 reserved[46]; +} __packed; + +/* + * DRAM Event Record - DER + * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 + */ +#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 +struct cxl_event_dram { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + u8 validity_flags[2]; + u8 channel; + u8 rank; + u8 nibble_mask[3]; + u8 bank_group; + u8 bank; + u8 row[3]; + u8 column[2]; + u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; + u8 reserved[0x17]; +} __packed; + +/* + * Get Health Info Record + * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 + */ +struct cxl_get_health_info { + u8 health_status; + u8 media_status; + u8 add_status; + u8 life_used; + u8 device_temp[2]; + u8 dirty_shutdown_cnt[4]; + u8 cor_vol_err_cnt[4]; + u8 cor_per_err_cnt[4]; +} __packed; + +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +struct cxl_event_mem_module { + struct cxl_event_record_hdr hdr; + u8 event_type; + struct cxl_get_health_info info; + u8 reserved[0x3d]; +} __packed; + struct cxl_mbox_get_partition_info { __le64 active_volatile_cap; __le64 active_persistent_cap; @@ -372,6 +532,12 @@ struct cxl_mbox_set_partition_info { #define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) +/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */ +struct cxl_mbox_set_timestamp_in { + __le64 timestamp; + +} __packed; + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI @@ -393,7 +559,6 @@ struct cxl_mem_command { struct cxl_command_info info; enum cxl_opcode opcode; u32 flags; -#define CXL_CMD_FLAG_NONE 0 #define CXL_CMD_FLAG_FORCE_ENABLE BIT(0) }; @@ -441,6 +606,9 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status); +int cxl_set_timestamp(struct cxl_dev_state *cxlds); + #ifdef CONFIG_CXL_SUSPEND void cxl_mem_active_inc(void); void cxl_mem_active_dec(void); |