diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mce/amd.c')
| -rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c | 1083 |
1 files changed, 406 insertions, 677 deletions
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 08831acc1d03..3f1dda355307 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -4,8 +4,6 @@ * * Written by Jacob Shin - AMD, Inc. * Maintained by: Borislav Petkov <bp@alien8.de> - * - * All MC4_MISCi registers are shared between cores on a node. */ #include <linux/interrupt.h> #include <linux/notifier.h> @@ -20,7 +18,6 @@ #include <linux/smp.h> #include <linux/string.h> -#include <asm/amd_nb.h> #include <asm/traps.h> #include <asm/apic.h> #include <asm/mce.h> @@ -46,9 +43,6 @@ /* Deferred error settings */ #define MSR_CU_DEF_ERR 0xC0000410 #define MASK_DEF_LVTOFF 0x000000F0 -#define MASK_DEF_INT_TYPE 0x00000006 -#define DEF_LVT_OFF 0x2 -#define DEF_INT_TYPE_APIC 0x2 /* Scalable MCA: */ @@ -57,6 +51,17 @@ static bool thresholding_irq_en; +struct mce_amd_cpu_data { + mce_banks_t thr_intr_banks; + mce_banks_t dfr_intr_banks; + + u32 thr_intr_en: 1, + dfr_intr_en: 1, + __resv: 30; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data); + static const char * const th_names[] = { "load_store", "insn_fetch", @@ -71,35 +76,58 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; -struct smca_bank_name { - const char *name; /* Short name for sysfs */ - const char *long_name; /* Long name for pretty-printing */ +#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) + +struct smca_hwid { + unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ + u32 hwid_mcatype; /* (hwid,mcatype) tuple */ +}; + +struct smca_bank { + const struct smca_hwid *hwid; + u32 id; /* Value of MCA_IPID[InstanceId]. */ + u8 sysfs_id; /* Value used for sysfs name. */ + u64 paddrv :1, /* Physical Address Valid bit in MCA_CONFIG */ + __reserved :63; }; -static struct smca_bank_name smca_names[] = { - [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" }, - [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, - [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, - [SMCA_DE] = { "decode_unit", "Decode Unit" }, - [SMCA_RESERVED] = { "reserved", "Reserved" }, - [SMCA_EX] = { "execution_unit", "Execution Unit" }, - [SMCA_FP] = { "floating_point", "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, - [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, - [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, +static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); +static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts); + +static const char * const smca_names[] = { + [SMCA_LS ... SMCA_LS_V2] = "load_store", + [SMCA_IF] = "insn_fetch", + [SMCA_L2_CACHE] = "l2_cache", + [SMCA_DE] = "decode_unit", + [SMCA_RESERVED] = "reserved", + [SMCA_EX] = "execution_unit", + [SMCA_FP] = "floating_point", + [SMCA_L3_CACHE] = "l3_cache", + [SMCA_CS ... SMCA_CS_V2] = "coherent_slave", + [SMCA_PIE] = "pie", /* UMC v2 is separate because both of them can exist in a single system. */ - [SMCA_UMC] = { "umc", "Unified Memory Controller" }, - [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" }, - [SMCA_PB] = { "param_block", "Parameter Block" }, - [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, - [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, - [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, - [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, - [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, - [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, - [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, - [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, + [SMCA_UMC] = "umc", + [SMCA_UMC_V2] = "umc_v2", + [SMCA_MA_LLC] = "ma_llc", + [SMCA_PB] = "param_block", + [SMCA_PSP ... SMCA_PSP_V2] = "psp", + [SMCA_SMU ... SMCA_SMU_V2] = "smu", + [SMCA_MP5] = "mp5", + [SMCA_MPDMA] = "mpdma", + [SMCA_NBIO] = "nbio", + [SMCA_PCIE ... SMCA_PCIE_V2] = "pcie", + [SMCA_XGMI_PCS] = "xgmi_pcs", + [SMCA_NBIF] = "nbif", + [SMCA_SHUB] = "shub", + [SMCA_SATA] = "sata", + [SMCA_USB] = "usb", + [SMCA_USR_DP] = "usr_dp", + [SMCA_USR_CP] = "usr_cp", + [SMCA_GMI_PCS] = "gmi_pcs", + [SMCA_XGMI_PHY] = "xgmi_phy", + [SMCA_WAFL_PHY] = "wafl_phy", + [SMCA_GMI_PHY] = "gmi_phy", }; static const char *smca_get_name(enum smca_bank_types t) @@ -107,33 +135,25 @@ static const char *smca_get_name(enum smca_bank_types t) if (t >= N_SMCA_BANK_TYPES) return NULL; - return smca_names[t].name; + return smca_names[t]; } -const char *smca_get_long_name(enum smca_bank_types t) -{ - if (t >= N_SMCA_BANK_TYPES) - return NULL; - - return smca_names[t].long_name; -} -EXPORT_SYMBOL_GPL(smca_get_long_name); - -static enum smca_bank_types smca_get_bank_type(unsigned int bank) +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank) { struct smca_bank *b; if (bank >= MAX_NR_BANKS) return N_SMCA_BANK_TYPES; - b = &smca_banks[bank]; + b = &per_cpu(smca_banks, cpu)[bank]; if (!b->hwid) return N_SMCA_BANK_TYPES; return b->hwid->bank_type; } +EXPORT_SYMBOL_GPL(smca_get_bank_type); -static struct smca_hwid smca_hwid_mcatypes[] = { +static const struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype } */ /* Reserved type */ @@ -154,6 +174,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_CS, HWID_MCATYPE(0x2E, 0x0) }, { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) }, { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) }, + { SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) }, /* Unified Memory Controller MCA type */ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, @@ -173,6 +194,9 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* Microprocessor 5 Unit MCA type */ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) }, + /* MPDMA MCA type */ + { SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) }, + /* Northbridge IO Unit MCA type */ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) }, @@ -180,19 +204,19 @@ static struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, - /* xGMI PCS MCA type */ { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, - - /* xGMI PHY MCA type */ + { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) }, + { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) }, + { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) }, + { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) }, + { SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) }, + { SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) }, + { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) }, { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, - - /* WAFL PHY MCA type */ { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, + { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) }, }; -struct smca_bank smca_banks[MAX_NR_BANKS]; -EXPORT_SYMBOL_GPL(smca_banks); - /* * In SMCA enabled processors, we can have multiple banks for a given IP type. * So to define a unique name for each bank, we use a temp c-string to append @@ -204,16 +228,40 @@ EXPORT_SYMBOL_GPL(smca_banks); #define MAX_MCATYPE_NAME_LEN 30 static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; +struct threshold_block { + /* This block's number within its bank. */ + unsigned int block; + /* MCA bank number that contains this block. */ + unsigned int bank; + /* CPU which controls this block's MCA bank. */ + unsigned int cpu; + /* MCA_MISC MSR address for this block. */ + u32 address; + /* Enable/Disable APIC interrupt. */ + bool interrupt_enable; + /* Bank can generate an interrupt. */ + bool interrupt_capable; + /* Value upon which threshold interrupt is generated. */ + u16 threshold_limit; + /* sysfs object */ + struct kobject kobj; + /* List of threshold blocks within this block's MCA bank. */ + struct list_head miscj; +}; + +struct threshold_bank { + struct kobject *kobj; + /* List of threshold blocks within this MCA bank. */ + struct list_head miscj; +}; + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); /* * A list of the banks enabled on each logical CPU. Controls which respective * descriptors to initialize later in mce_threshold_create_device(). */ -static DEFINE_PER_CPU(unsigned int, bank_map); - -/* Map of banks that have more than MCA_MISC0 available. */ -static DEFINE_PER_CPU(u32, smca_misc_banks_map); +static DEFINE_PER_CPU(u64, bank_map); static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -224,32 +272,12 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; -static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) -{ - u32 low, high; - - /* - * For SMCA enabled processors, BLKPTR field of the first MISC register - * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). - */ - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return; - - if (!(low & MCI_CONFIG_MCAX)) - return; - - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) - return; - - if (low & MASK_BLKPTR_LO) - per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); - -} - static void smca_configure(unsigned int bank, unsigned int cpu) { + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); + u8 *bank_counts = this_cpu_ptr(smca_bank_counts); + const struct smca_hwid *s_hwid; unsigned int i, hwid_mcatype; - struct smca_hwid *s_hwid; u32 high, low; u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank); @@ -277,17 +305,35 @@ static void smca_configure(unsigned int bank, unsigned int cpu) * APIC based interrupt. First, check that no interrupt has been * set. */ - if ((low & BIT(5)) && !((high >> 5) & 0x3)) + if ((low & BIT(5)) && !((high >> 5) & 0x3) && data->dfr_intr_en) { + __set_bit(bank, data->dfr_intr_banks); high |= BIT(5); + } - wrmsr(smca_config, low, high); - } + /* + * SMCA Corrected Error Interrupt + * + * MCA_CONFIG[IntPresent] is bit 10, and tells us if the bank can + * send an MCA Thresholding interrupt without the OS initializing + * this feature. This can be used if the threshold limit is managed + * by the platform. + * + * MCA_CONFIG[IntEn] is bit 40 (8 in the high portion of the MSR). + * The OS should set this to inform the platform that the OS is ready + * to handle the MCA Thresholding interrupt. + */ + if ((low & BIT(10)) && data->thr_intr_en) { + __set_bit(bank, data->thr_intr_banks); + high |= BIT(8); + } - smca_set_misc_banks_map(bank, cpu); + this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8)); - /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) - return; + if (low & MCI_CONFIG_PADDRV) + this_cpu_ptr(smca_banks)[bank].paddrv = 1; + + wrmsr(smca_config, low, high); + } if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); @@ -299,10 +345,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu) for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { - smca_banks[bank].hwid = s_hwid; - smca_banks[bank].id = low; - smca_banks[bank].sysfs_id = s_hwid->count++; + this_cpu_ptr(smca_banks)[bank].hwid = s_hwid; + this_cpu_ptr(smca_banks)[bank].id = low; + this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++; break; } } @@ -310,25 +357,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; }; -static inline bool is_shared_bank(int bank) -{ - /* - * Scalable MCA provides for only one core to have access to the MSRs of - * a shared bank. - */ - if (mce_flags.smca) - return false; - - /* Bank 4 is for northbridge reporting and is thus shared */ - return (bank == 4); -} - static const char *bank4_names(const struct threshold_block *b) { switch (b->address) { @@ -364,54 +397,54 @@ static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) return msr_high_bits & BIT(28); } -static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) +static bool lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) { int msr = (hi & MASK_LVTOFF_HI) >> 20; + /* + * On SMCA CPUs, LVT offset is programmed at a different MSR, and + * the BIOS provides the value. The original field where LVT offset + * was set is reserved. Return early here: + */ + if (mce_flags.smca) + return false; + if (apic < 0) { pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } if (apic != msr) { - /* - * On SMCA CPUs, LVT offset is programmed at a different MSR, and - * the BIOS provides the value. The original field where LVT offset - * was set is reserved. Return early here: - */ - if (mce_flags.smca) - return 0; - pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d " "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu, apic, b->bank, b->block, b->address, hi, lo); - return 0; + return false; } - return 1; + return true; }; -/* Reprogram MCx_MISC MSR behind this threshold bank. */ -static void threshold_restart_bank(void *_tr) +/* Reprogram MCx_MISC MSR behind this threshold block. */ +static void threshold_restart_block(void *_tr) { struct thresh_restart *tr = _tr; u32 hi, lo; /* sysfs write might race against an offline operation */ - if (this_cpu_read(threshold_banks)) + if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off) return; rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -443,6 +476,36 @@ static void threshold_restart_bank(void *_tr) wrmsr(tr->b->address, lo, hi); } +static void threshold_restart_bank(unsigned int bank, bool intr_en) +{ + struct threshold_bank **thr_banks = this_cpu_read(threshold_banks); + struct threshold_block *block, *tmp; + struct thresh_restart tr; + + if (!thr_banks || !thr_banks[bank]) + return; + + memset(&tr, 0, sizeof(tr)); + + list_for_each_entry_safe(block, tmp, &thr_banks[bank]->miscj, miscj) { + tr.b = block; + tr.b->interrupt_enable = intr_en; + threshold_restart_block(&tr); + } +} + +/* Try to use the threshold limit reported through APEI. */ +static u16 get_thr_limit(void) +{ + u32 thr_limit = mce_get_apei_thr_limit(); + + /* Fallback to old default if APEI limit is not available. */ + if (!thr_limit) + return THRESHOLD_MAX; + + return min(thr_limit, THRESHOLD_MAX); +} + static void mce_threshold_block_init(struct threshold_block *b, int offset) { struct thresh_restart tr = { @@ -451,8 +514,8 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset) .lvt_off = offset, }; - b->threshold_limit = THRESHOLD_MAX; - threshold_restart_bank(&tr); + b->threshold_limit = get_thr_limit(); + threshold_restart_block(&tr); }; static int setup_APIC_mce_threshold(int reserved, int new) @@ -464,53 +527,6 @@ static int setup_APIC_mce_threshold(int reserved, int new) return reserved; } -static int setup_APIC_deferred_error(int reserved, int new) -{ - if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, - APIC_EILVT_MSG_FIX, 0)) - return new; - - return reserved; -} - -static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) -{ - u32 low = 0, high = 0; - int def_offset = -1, def_new; - - if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) - return; - - def_new = (low & MASK_DEF_LVTOFF) >> 4; - if (!(low & MASK_DEF_LVTOFF)) { - pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); - def_new = DEF_LVT_OFF; - low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); - } - - def_offset = setup_APIC_deferred_error(def_offset, def_new); - if ((def_offset == def_new) && - (deferred_error_int_vector != amd_deferred_error_interrupt)) - deferred_error_int_vector = amd_deferred_error_interrupt; - - if (!mce_flags.smca) - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; - - wrmsr(MSR_CU_DEF_ERR, low, high); -} - -static u32 smca_get_block_address(unsigned int bank, unsigned int block, - unsigned int cpu) -{ - if (!block) - return MSR_AMD64_SMCA_MCx_MISC(bank); - - if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) - return 0; - - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); -} - static u32 get_block_address(u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block, unsigned int cpu) @@ -520,13 +536,20 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return addr; - if (mce_flags.smca) - return smca_get_block_address(bank, block, cpu); + if (mce_flags.smca) { + if (!block) + return MSR_AMD64_SMCA_MCx_MISC(bank); + + if (!(low & MASK_BLKPTR_LO)) + return 0; + + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + } /* Fall back to method we used for older processors: */ switch (block) { case 0: - addr = msr_ops.misc(bank); + addr = mca_msr_reg(bank, MCA_MISC); break; case 1: offset = ((low & MASK_BLKPTR_LO) >> 21); @@ -539,17 +562,15 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high, return addr; } -static int -prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, - int offset, u32 misc_high) +static int prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, + int offset, u32 misc_high) { unsigned int cpu = smp_processor_id(); - u32 smca_low, smca_high; struct threshold_block b; int new; if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); + per_cpu(bank_map, cpu) |= BIT_ULL(bank); memset(&b, 0, sizeof(b)); b.cpu = cpu; @@ -561,20 +582,13 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, if (!b.interrupt_capable) goto done; + __set_bit(bank, this_cpu_ptr(&mce_amd_data)->thr_intr_banks); b.interrupt_enable = 1; - if (!mce_flags.smca) { - new = (misc_high & MASK_LVTOFF_HI) >> 20; - goto set_offset; - } - - /* Gather LVT offset for thresholding: */ - if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high)) - goto out; - - new = (smca_low & SMCA_THR_LVT_OFF) >> 12; + if (mce_flags.smca) + goto done; -set_offset: + new = (misc_high & MASK_LVTOFF_HI) >> 20; offset = setup_APIC_mce_threshold(offset, new); if (offset == new) thresholding_irq_en = true; @@ -582,13 +596,12 @@ set_offset: done: mce_threshold_block_init(&b, offset); -out: return offset; } bool amd_filter_mce(struct mce *m) { - enum smca_bank_types bank_type = smca_get_bank_type(m->bank); + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); struct cpuinfo_x86 *c = &boot_cpu_data; /* See Family 17h Models 10h-2Fh Erratum #1114. */ @@ -626,7 +639,7 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) } else if (c->x86 == 0x17 && (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { - if (smca_get_bank_type(bank) != SMCA_IF) + if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF) return; msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); @@ -635,12 +648,12 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) return; } - rdmsrl(MSR_K7_HWCR, hwcr); + rdmsrq(MSR_K7_HWCR, hwcr); /* McStatusWrEn has to be set */ need_toggle = !(hwcr & BIT(18)); if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); + wrmsrq(MSR_K7_HWCR, hwcr | BIT(18)); /* Clear CntP bit safely */ for (i = 0; i < num_msrs; i++) @@ -648,7 +661,55 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) /* restore old settings */ if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr); + wrmsrq(MSR_K7_HWCR, hwcr); +} + +static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + + /* This should be disabled by the BIOS, but isn't always */ + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { + /* + * disable GART TBL walk error reporting, which + * trips off incorrectly with the IOMMU & 3ware + * & Cerberus: + */ + clear_bit(10, (unsigned long *)&mce_banks[4].ctl); + } + + /* + * Various K7s with broken bank 0 around. Always disable + * by default. + */ + if (c->x86 == 6 && this_cpu_read(mce_num_banks)) + mce_banks[0].ctl = 0; +} + +/* + * Enable the APIC LVT interrupt vectors once per-CPU. This should be done before hardware is + * ready to send interrupts. + * + * Individual error sources are enabled later during per-bank init. + */ +static void smca_enable_interrupt_vectors(void) +{ + struct mce_amd_cpu_data *data = this_cpu_ptr(&mce_amd_data); + u64 mca_intr_cfg, offset; + + if (!mce_flags.smca || !mce_flags.succor) + return; + + if (rdmsrq_safe(MSR_CU_DEF_ERR, &mca_intr_cfg)) + return; + + offset = (mca_intr_cfg & SMCA_THR_LVT_OFF) >> 12; + if (!setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0)) + data->thr_intr_en = 1; + + offset = (mca_intr_cfg & MASK_DEF_LVTOFF) >> 4; + if (!setup_APIC_eilvt(offset, DEFERRED_ERROR_VECTOR, APIC_EILVT_MSG_FIX, 0)) + data->dfr_intr_en = 1; } /* cpu init entry point, called from mce.c with preempt off */ @@ -658,11 +719,20 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) u32 low = 0, high = 0, address = 0; int offset = -1; + amd_apply_cpu_quirks(c); + + mce_flags.amd_threshold = 1; + + smca_enable_interrupt_vectors(); for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (mce_flags.smca) + if (mce_flags.smca) { smca_configure(bank, cpu); + if (!this_cpu_ptr(&mce_amd_data)->thr_intr_en) + continue; + } + disable_err_thresholding(c, bank); for (block = 0; block < NR_BLOCKS; ++block) { @@ -683,255 +753,88 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) offset = prepare_threshold_block(bank, block, address, offset, high); } } - - if (mce_flags.succor) - deferred_error_interrupt_enable(c); } -int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +void smca_bsp_init(void) { - u64 dram_base_addr, dram_limit_addr, dram_hole_base; - /* We start from the normalized address */ - u64 ret_addr = norm_addr; - - u32 tmp; - - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; - u8 intlv_addr_sel, intlv_addr_bit; - u8 num_intlv_bits, hashed_bit; - u8 lgcy_mmio_hole_en, base = 0; - u8 cs_mask, cs_id = 0; - bool hash_enabled = false; - - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) - goto out_err; - - /* Remove HiAddrOffset from normalized address, if enabled: */ - if (tmp & BIT(0)) { - u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; - - if (norm_addr >= hi_addr_offset) { - ret_addr -= hi_addr_offset; - base = 1; - } - } - - /* Read D18F0x110 (DramBaseAddress). */ - if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) - goto out_err; - - /* Check if address range is valid. */ - if (!(tmp & BIT(0))) { - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", - __func__, tmp); - goto out_err; - } - - lgcy_mmio_hole_en = tmp & BIT(1); - intlv_num_chan = (tmp >> 4) & 0xF; - intlv_addr_sel = (tmp >> 8) & 0x7; - dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; - - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ - if (intlv_addr_sel > 3) { - pr_err("%s: Invalid interleave address select %d.\n", - __func__, intlv_addr_sel); - goto out_err; - } - - /* Read D18F0x114 (DramLimitAddress). */ - if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) - goto out_err; - - intlv_num_sockets = (tmp >> 8) & 0x1; - intlv_num_dies = (tmp >> 10) & 0x3; - dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); - - intlv_addr_bit = intlv_addr_sel + 8; - - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ - switch (intlv_num_chan) { - case 0: intlv_num_chan = 0; break; - case 1: intlv_num_chan = 1; break; - case 3: intlv_num_chan = 2; break; - case 5: intlv_num_chan = 3; break; - case 7: intlv_num_chan = 4; break; - - case 8: intlv_num_chan = 1; - hash_enabled = true; - break; - default: - pr_err("%s: Invalid number of interleaved channels %d.\n", - __func__, intlv_num_chan); - goto out_err; - } - - num_intlv_bits = intlv_num_chan; - - if (intlv_num_dies > 2) { - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", - __func__, intlv_num_dies); - goto out_err; - } - - num_intlv_bits += intlv_num_dies; - - /* Add a bit if sockets are interleaved. */ - num_intlv_bits += intlv_num_sockets; - - /* Assert num_intlv_bits <= 4 */ - if (num_intlv_bits > 4) { - pr_err("%s: Invalid interleave bits %d.\n", - __func__, num_intlv_bits); - goto out_err; - } - - if (num_intlv_bits > 0) { - u64 temp_addr_x, temp_addr_i, temp_addr_y; - u8 die_id_bit, sock_id_bit, cs_fabric_id; - - /* - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. - * This is the fabric id for this coherent slave. Use - * umc/channel# as instance id of the coherent slave - * for FICAA. - */ - if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) - goto out_err; - - cs_fabric_id = (tmp >> 8) & 0xFF; - die_id_bit = 0; - - /* If interleaved over more than 1 channel: */ - if (intlv_num_chan) { - die_id_bit = intlv_num_chan; - cs_mask = (1 << die_id_bit) - 1; - cs_id = cs_fabric_id & cs_mask; - } - - sock_id_bit = die_id_bit; - - /* Read D18F1x208 (SystemFabricIdMask). */ - if (intlv_num_dies || intlv_num_sockets) - if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) - goto out_err; - - /* If interleaved over more than 1 die. */ - if (intlv_num_dies) { - sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (tmp >> 24) & 0xF; - die_id_mask = (tmp >> 8) & 0xFF; - - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; - } - - /* If interleaved over more than 1 socket. */ - if (intlv_num_sockets) { - socket_id_shift = (tmp >> 28) & 0xF; - socket_id_mask = (tmp >> 16) & 0xFF; - - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; - } - - /* - * The pre-interleaved address consists of XXXXXXIIIYYYYY - * where III is the ID for this CS, and XXXXXXYYYYY are the - * address bits from the post-interleaved address. - * "num_intlv_bits" has been calculated to tell us how many "I" - * bits there are. "intlv_addr_bit" tells us how many "Y" bits - * there are (where "I" starts). - */ - temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); - temp_addr_i = (cs_id << intlv_addr_bit); - temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; - ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; - } - - /* Add dram base address */ - ret_addr += dram_base_addr; - - /* If legacy MMIO hole enabled */ - if (lgcy_mmio_hole_en) { - if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) - goto out_err; - - dram_hole_base = tmp & GENMASK(31, 24); - if (ret_addr >= dram_hole_base) - ret_addr += (BIT_ULL(32) - dram_hole_base); - } - - if (hash_enabled) { - /* Save some parentheses and grab ls-bit at the end. */ - hashed_bit = (ret_addr >> 12) ^ - (ret_addr >> 18) ^ - (ret_addr >> 21) ^ - (ret_addr >> 30) ^ - cs_id; + mce_threshold_vector = amd_threshold_interrupt; + deferred_error_int_vector = amd_deferred_error_interrupt; +} - hashed_bit &= BIT(0); +/* + * DRAM ECC errors are reported in the Northbridge (bank 4) with + * Extended Error Code 8. + */ +static bool legacy_mce_is_memory_error(struct mce *m) +{ + return m->bank == 4 && XEC(m->status, 0x1f) == 8; +} - if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) - ret_addr ^= BIT(intlv_addr_bit); - } +/* + * DRAM ECC errors are reported in Unified Memory Controllers with + * Extended Error Code 0. + */ +static bool smca_mce_is_memory_error(struct mce *m) +{ + enum smca_bank_types bank_type; - /* Is calculated system address is above DRAM limit address? */ - if (ret_addr > dram_limit_addr) - goto out_err; + if (XEC(m->status, 0x3f)) + return false; - *sys_addr = ret_addr; - return 0; + bank_type = smca_get_bank_type(m->extcpu, m->bank); -out_err: - return -EINVAL; + return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2; } -EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); bool amd_mce_is_memory_error(struct mce *m) { - /* ErrCodeExt[20:16] */ - u8 xec = (m->status >> 16) & 0x1f; - if (mce_flags.smca) - return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0; - - return m->bank == 4 && xec == 0x8; + return smca_mce_is_memory_error(m); + else + return legacy_mce_is_memory_error(m); } -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) +/* + * Some AMD systems have an explicit indicator that the value in MCA_ADDR is a + * system physical address. Individual cases though, need to be detected for + * other systems. Future cases will be added as needed. + * + * 1) General case + * a) Assume address is not usable. + * 2) Poison errors + * a) Indicated by MCA_STATUS[43]: poison. Defined for all banks except legacy + * northbridge (bank 4). + * b) Refers to poison consumption in the core. Does not include "no action", + * "action optional", or "deferred" error severities. + * c) Will include a usable address so that immediate action can be taken. + * 3) Northbridge DRAM ECC errors + * a) Reported in legacy bank 4 with extended error code (XEC) 8. + * b) MCA_STATUS[43] is *not* defined as poison in legacy bank 4. Therefore, + * this bit should not be checked. + * 4) MCI_STATUS_PADDRVAL is set + * a) Will provide a valid system physical address. + * + * NOTE: SMCA UMC memory errors fall into case #1. + */ +bool amd_mce_usable_address(struct mce *m) { - struct mce m; - - mce_setup(&m); - - m.status = status; - m.misc = misc; - m.bank = bank; - m.tsc = rdtsc(); - - if (m.status & MCI_STATUS_ADDRV) { - m.addr = addr; - - /* - * Extract [55:<lsb>] where lsb is the least significant - * *valid* bit of the address bits. - */ - if (mce_flags.smca) { - u8 lsb = (m.addr >> 56) & 0x3f; - - m.addr &= GENMASK_ULL(55, lsb); - } + /* Check special northbridge case 3) first. */ + if (!mce_flags.smca) { + if (legacy_mce_is_memory_error(m)) + return true; + else if (m->bank == 4) + return false; } - if (mce_flags.smca) { - rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); + if (this_cpu_ptr(smca_banks)[m->bank].paddrv) + return m->status & MCI_STATUS_PADDRV; - if (m.status & MCI_STATUS_SYNDV) - rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); - } + /* Check poison bit for all other bank types. */ + if (m->status & MCI_STATUS_POISON) + return true; - mce_log(&m); + /* Assume address is not usable for all others. */ + return false; } DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) @@ -940,99 +843,23 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error) inc_irq_stat(irq_deferred_error_count); deferred_error_int_vector(); trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); - ack_APIC_irq(); -} - -/* - * Returns true if the logged error is deferred. False, otherwise. - */ -static inline bool -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc) -{ - u64 status, addr = 0; - - rdmsrl(msr_stat, status); - if (!(status & MCI_STATUS_VAL)) - return false; - - if (status & MCI_STATUS_ADDRV) - rdmsrl(msr_addr, addr); - - __log_error(bank, status, addr, misc); - - wrmsrl(msr_stat, 0); - - return status & MCI_STATUS_DEFERRED; -} - -/* - * We have three scenarios for checking for Deferred errors: - * - * 1) Non-SMCA systems check MCA_STATUS and log error if found. - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also - * clear MCA_DESTAT. - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and - * log it. - */ -static void log_error_deferred(unsigned int bank) -{ - bool defrd; - - defrd = _log_error_bank(bank, msr_ops.status(bank), - msr_ops.addr(bank), 0); - - if (!mce_flags.smca) - return; - - /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */ - if (defrd) { - wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0); - return; - } - - /* - * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check - * for a valid error. - */ - _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank), - MSR_AMD64_SMCA_MCx_DEADDR(bank), 0); + apic_eoi(); } /* APIC interrupt handler for deferred errors */ static void amd_deferred_error_interrupt(void) { - unsigned int bank; - - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) - log_error_deferred(bank); + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks); } -static void log_error_thresholding(unsigned int bank, u64 misc) +void mce_amd_handle_storm(unsigned int bank, bool on) { - _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc); + threshold_restart_bank(bank, on); } -static void log_and_reset_block(struct threshold_block *block) +static void amd_reset_thr_limit(unsigned int bank) { - struct thresh_restart tr; - u32 low = 0, high = 0; - - if (!block) - return; - - if (rdmsr_safe(block->address, &low, &high)) - return; - - if (!(high & MASK_OVERFLOW_HI)) - return; - - /* Log the MCE which caused the threshold event. */ - log_error_thresholding(block->bank, ((u64)high << 32) | low); - - /* Reset threshold block after logging error. */ - memset(&tr, 0, sizeof(tr)); - tr.b = block; - threshold_restart_bank(&tr); + threshold_restart_bank(bank, true); } /* @@ -1041,34 +868,22 @@ static void log_and_reset_block(struct threshold_block *block) */ static void amd_threshold_interrupt(void) { - struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; - struct threshold_bank **bp = this_cpu_read(threshold_banks); - unsigned int bank, cpu = smp_processor_id(); + machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->thr_intr_banks); +} - /* - * Validate that the threshold bank has been initialized already. The - * handler is installed at boot time, but on a hotplug event the - * interrupt might fire before the data has been initialized. - */ - if (!bp) - return; +void amd_clear_bank(struct mce *m) +{ + amd_reset_thr_limit(m->bank); - for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) - continue; + /* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */ + if (m->status & MCI_STATUS_DEFERRED) + mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0); - first_block = bp[bank]->blocks; - if (!first_block) - continue; + /* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */ + if (m->kflags & MCE_CHECK_DFR_REGS) + return; - /* - * The first block is also the head of the list. Check it first - * before iterating over the rest. - */ - log_and_reset_block(first_block); - list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj) - log_and_reset_block(block); - } + mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0); } /* @@ -1106,7 +921,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) memset(&tr, 0, sizeof(tr)); tr.b = b; - if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1)) + if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1)) return -ENODEV; return size; @@ -1131,7 +946,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) b->threshold_limit = new; tr.b = b; - if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1)) + if (smp_call_function_single(b->cpu, threshold_restart_block, &tr, 1)) return -ENODEV; return size; @@ -1170,6 +985,7 @@ static struct attribute *default_attrs[] = { NULL, /* possibly interrupt_enable if supported, see below */ NULL, }; +ATTRIBUTE_GROUPS(default); #define to_block(k) container_of(k, struct threshold_block, kobj) #define to_attr(a) container_of(a, struct threshold_attr, attr) @@ -1204,13 +1020,13 @@ static const struct sysfs_ops threshold_ops = { static void threshold_block_release(struct kobject *kobj); -static struct kobj_type threshold_ktype = { +static const struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, + .default_groups = default_groups, .release = threshold_block_release, }; -static const char *get_name(unsigned int bank, struct threshold_block *b) +static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b) { enum smca_bank_types bank_type; @@ -1221,22 +1037,29 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - bank_type = smca_get_bank_type(bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; + bank_type = smca_get_bank_type(cpu, bank); - if (b && bank_type == SMCA_UMC) { + if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; } - if (smca_banks[bank].hwid->count == 1) + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; + } + + if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) return smca_get_name(bank_type); snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_get_name(bank_type), - smca_banks[bank].sysfs_id); + "%s_%u", smca_get_name(bank_type), + per_cpu(smca_banks, cpu)[bank].sysfs_id); return buf_mcatype; } @@ -1275,24 +1098,20 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb b->address = address; b->interrupt_enable = 0; b->interrupt_capable = lvt_interrupt_supported(bank, high); - b->threshold_limit = THRESHOLD_MAX; + b->threshold_limit = get_thr_limit(); if (b->interrupt_capable) { - threshold_ktype.default_attrs[2] = &interrupt_enable.attr; + default_attrs[2] = &interrupt_enable.attr; b->interrupt_enable = 1; } else { - threshold_ktype.default_attrs[2] = NULL; + default_attrs[2] = NULL; } - INIT_LIST_HEAD(&b->miscj); + list_add(&b->miscj, &tb->miscj); - /* This is safe as @tb is not visible yet */ - if (tb->blocks) - list_add(&b->miscj, &tb->blocks->miscj); - else - tb->blocks = b; + mce_threshold_block_init(b, (high & MASK_LVTOFF_HI) >> 20); - err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); if (err) goto out_free; recurse: @@ -1317,62 +1136,17 @@ out_free: return err; } -static int __threshold_add_blocks(struct threshold_bank *b) -{ - struct list_head *head = &b->blocks->miscj; - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - int err = 0; - - err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); - if (err) - return err; - - list_for_each_entry_safe(pos, tmp, head, miscj) { - - err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); - if (err) { - list_for_each_entry_safe_reverse(pos, tmp, head, miscj) - kobject_del(&pos->kobj); - - return err; - } - } - return err; -} - static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, unsigned int bank) { struct device *dev = this_cpu_read(mce_device); - struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; - const char *name = get_name(bank, NULL); + const char *name = get_name(cpu, bank, NULL); int err = 0; if (!dev) return -ENODEV; - if (is_shared_bank(bank)) { - nb = node_to_amd_nb(topology_die_id(cpu)); - - /* threshold descriptor already initialized on this node? */ - if (nb && nb->bank4) { - /* yes, use it */ - b = nb->bank4; - err = kobject_add(b->kobj, &dev->kobj, name); - if (err) - goto out; - - bp[bank] = b; - refcount_inc(&b->cpus); - - err = __threshold_add_blocks(b); - - goto out; - } - } - b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) { err = -ENOMEM; @@ -1386,18 +1160,9 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, goto out_free; } - if (is_shared_bank(bank)) { - b->shared = 1; - refcount_set(&b->cpus, 1); - - /* nb is already initialized, see above */ - if (nb) { - WARN_ON(nb->bank4); - nb->bank4 = b; - } - } + INIT_LIST_HEAD(&b->miscj); - err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC)); if (err) goto out_kobj; @@ -1417,66 +1182,39 @@ static void threshold_block_release(struct kobject *kobj) kfree(to_block(kobj)); } -static void deallocate_threshold_blocks(struct threshold_bank *bank) +static void threshold_remove_bank(struct threshold_bank *bank) { struct threshold_block *pos, *tmp; - list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) { + list_for_each_entry_safe(pos, tmp, &bank->miscj, miscj) { list_del(&pos->miscj); kobject_put(&pos->kobj); } - kobject_put(&bank->blocks->kobj); -} - -static void __threshold_remove_blocks(struct threshold_bank *b) -{ - struct threshold_block *pos = NULL; - struct threshold_block *tmp = NULL; - - kobject_del(b->kobj); - - list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) - kobject_del(&pos->kobj); + kobject_put(bank->kobj); + kfree(bank); } -static void threshold_remove_bank(struct threshold_bank *bank) +static void __threshold_remove_device(struct threshold_bank **bp) { - struct amd_northbridge *nb; - - if (!bank->blocks) - goto out_free; + unsigned int bank, numbanks = this_cpu_read(mce_num_banks); - if (!bank->shared) - goto out_dealloc; + for (bank = 0; bank < numbanks; bank++) { + if (!bp[bank]) + continue; - if (!refcount_dec_and_test(&bank->cpus)) { - __threshold_remove_blocks(bank); - return; - } else { - /* - * The last CPU on this node using the shared bank is going - * away, remove that bank now. - */ - nb = node_to_amd_nb(topology_die_id(smp_processor_id())); - nb->bank4 = NULL; + threshold_remove_bank(bp[bank]); + bp[bank] = NULL; } - -out_dealloc: - deallocate_threshold_blocks(bank); - -out_free: - kobject_put(bank->kobj); - kfree(bank); + kfree(bp); } -int mce_threshold_remove_device(unsigned int cpu) +void mce_threshold_remove_device(unsigned int cpu) { struct threshold_bank **bp = this_cpu_read(threshold_banks); - unsigned int bank, numbanks = this_cpu_read(mce_num_banks); if (!bp) - return 0; + return; /* * Clear the pointer before cleaning up, so that the interrupt won't @@ -1484,14 +1222,8 @@ int mce_threshold_remove_device(unsigned int cpu) */ this_cpu_write(threshold_banks, NULL); - for (bank = 0; bank < numbanks; bank++) { - if (bp[bank]) { - threshold_remove_bank(bp[bank]); - bp[bank] = NULL; - } - } - kfree(bp); - return 0; + __threshold_remove_device(bp); + return; } /** @@ -1505,37 +1237,34 @@ int mce_threshold_remove_device(unsigned int cpu) * thread running on @cpu. The callback is invoked on all CPUs which are * online when the callback is installed or during a real hotplug event. */ -int mce_threshold_create_device(unsigned int cpu) +void mce_threshold_create_device(unsigned int cpu) { unsigned int numbanks, bank; struct threshold_bank **bp; - int err; if (!mce_flags.amd_threshold) - return 0; + return; bp = this_cpu_read(threshold_banks); if (bp) - return 0; + return; numbanks = this_cpu_read(mce_num_banks); bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL); if (!bp) - return -ENOMEM; + return; for (bank = 0; bank < numbanks; ++bank) { - if (!(this_cpu_read(bank_map) & (1 << bank))) + if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) continue; - err = threshold_create_bank(bp, cpu, bank); - if (err) - goto out_err; + if (threshold_create_bank(bp, cpu, bank)) { + __threshold_remove_device(bp); + return; + } } this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; - return 0; -out_err: - mce_threshold_remove_device(cpu); - return err; + return; } |
