diff options
Diffstat (limited to 'drivers/platform/x86/intel/ifs/runtest.c')
-rw-r--r-- | drivers/platform/x86/intel/ifs/runtest.c | 363 |
1 files changed, 311 insertions, 52 deletions
diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 13ecd55c6668..f978dd05d4d8 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -23,6 +23,19 @@ /* Max retries on the same chunk */ #define MAX_IFS_RETRIES 5 +struct run_params { + struct ifs_data *ifsd; + union ifs_scan *activate; + union ifs_status status; +}; + +struct sbaf_run_params { + struct ifs_data *ifsd; + int *retry_cnt; + union ifs_sbaf *activate; + union ifs_sbaf_status status; +}; + /* * Number of TSC cycles that a logical CPU will wait for the other * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). @@ -63,6 +76,19 @@ static const char * const scan_test_status[] = { static void message_not_tested(struct device *dev, int cpu, union ifs_status status) { + struct ifs_data *ifsd = ifs_get_data(dev); + + /* + * control_error is set when the microcode runs into a problem + * loading the image from the reserved BIOS memory, or it has + * been corrupted. Reloading the image may fix this issue. + */ + if (status.control_error) { + dev_warn(dev, "CPU(s) %*pbl: Scan controller error. Batch: %02x version: 0x%x\n", + cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version); + return; + } + if (status.error_code < ARRAY_SIZE(scan_test_status)) { dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n", cpumask_pr_args(cpu_smt_mask(cpu)), @@ -85,16 +111,6 @@ static void message_fail(struct device *dev, int cpu, union ifs_status status) struct ifs_data *ifsd = ifs_get_data(dev); /* - * control_error is set when the microcode runs into a problem - * loading the image from the reserved BIOS memory, or it has - * been corrupted. Reloading the image may fix this issue. - */ - if (status.control_error) { - dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image. Batch: %02x version: 0x%x\n", - cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version); - } - - /* * signature_error is set when the output from the scan chains does not * match the expected signature. This might be a transient problem (e.g. * due to a bit flip from an alpha particle or neutron). If the problem @@ -134,19 +150,57 @@ static bool can_restart(union ifs_status status) return false; } +#define SPINUNIT 100 /* 100 nsec */ +static atomic_t array_cpus_in; +static atomic_t scan_cpus_in; +static atomic_t sbaf_cpus_in; + +/* + * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() + */ +static void wait_for_sibling_cpu(atomic_t *t, long long timeout) +{ + int cpu = smp_processor_id(); + const struct cpumask *smt_mask = cpu_smt_mask(cpu); + int all_cpus = cpumask_weight(smt_mask); + + atomic_inc(t); + while (atomic_read(t) < all_cpus) { + if (timeout < SPINUNIT) + return; + ndelay(SPINUNIT); + timeout -= SPINUNIT; + touch_nmi_watchdog(); + } +} + /* * Execute the scan. Called "simultaneously" on all threads of a core * at high priority using the stop_cpus mechanism. */ static int doscan(void *data) { - int cpu = smp_processor_id(); - u64 *msrs = data; + int cpu = smp_processor_id(), start, stop; + struct run_params *params = data; + union ifs_status status; + struct ifs_data *ifsd; int first; + ifsd = params->ifsd; + + if (ifsd->generation) { + start = params->activate->gen2.start; + stop = params->activate->gen2.stop; + } else { + start = params->activate->gen0.start; + stop = params->activate->gen0.stop; + } + /* Only the first logical CPU on a core reports result */ first = cpumask_first(cpu_smt_mask(cpu)); + wait_for_sibling_cpu(&scan_cpus_in, NSEC_PER_SEC); + /* * This WRMSR will wait for other HT threads to also write * to this MSR (at most for activate.delay cycles). Then it @@ -155,12 +209,14 @@ static int doscan(void *data) * take up to 200 milliseconds (in the case where all chunks * are processed in a single pass) before it retires. */ - wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]); + wrmsrl(MSR_ACTIVATE_SCAN, params->activate->data); + rdmsrl(MSR_SCAN_STATUS, status.data); - if (cpu == first) { - /* Pass back the result of the scan */ - rdmsrl(MSR_SCAN_STATUS, msrs[1]); - } + trace_ifs_status(ifsd->cur_batch, start, stop, status.data); + + /* Pass back the result of the scan */ + if (cpu == first) + params->status = status; return 0; } @@ -173,13 +229,13 @@ static int doscan(void *data) */ static void ifs_test_core(int cpu, struct device *dev) { + union ifs_status status = {}; union ifs_scan activate; - union ifs_status status; unsigned long timeout; struct ifs_data *ifsd; int to_start, to_stop; int status_chunk; - u64 msrvals[2]; + struct run_params params; int retries; ifsd = ifs_get_data(dev); @@ -190,6 +246,8 @@ static void ifs_test_core(int cpu, struct device *dev) to_start = 0; to_stop = ifsd->valid_chunks - 1; + params.ifsd = ifs_get_data(dev); + if (ifsd->generation) { activate.gen2.start = to_start; activate.gen2.stop = to_stop; @@ -207,12 +265,11 @@ static void ifs_test_core(int cpu, struct device *dev) break; } - msrvals[0] = activate.data; - stop_core_cpuslocked(cpu, doscan, msrvals); - - status.data = msrvals[1]; + params.activate = &activate; + atomic_set(&scan_cpus_in, 0); + stop_core_cpuslocked(cpu, doscan, ¶ms); - trace_ifs_status(cpu, to_start, to_stop, status.data); + status = params.status; /* Some cases can be retried, give up for others */ if (!can_restart(status)) @@ -239,10 +296,10 @@ static void ifs_test_core(int cpu, struct device *dev) /* Update status for this core */ ifsd->scan_details = status.data; - if (status.control_error || status.signature_error) { + if (status.signature_error) { ifsd->status = SCAN_TEST_FAIL; message_fail(dev, cpu, status); - } else if (status.error_code) { + } else if (status.control_error || status.error_code) { ifsd->status = SCAN_NOT_TESTED; message_not_tested(dev, cpu, status); } else { @@ -250,34 +307,14 @@ static void ifs_test_core(int cpu, struct device *dev) } } -#define SPINUNIT 100 /* 100 nsec */ -static atomic_t array_cpus_out; - -/* - * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() - */ -static void wait_for_sibling_cpu(atomic_t *t, long long timeout) -{ - int cpu = smp_processor_id(); - const struct cpumask *smt_mask = cpu_smt_mask(cpu); - int all_cpus = cpumask_weight(smt_mask); - - atomic_inc(t); - while (atomic_read(t) < all_cpus) { - if (timeout < SPINUNIT) - return; - ndelay(SPINUNIT); - timeout -= SPINUNIT; - touch_nmi_watchdog(); - } -} - static int do_array_test(void *data) { union ifs_array *command = data; int cpu = smp_processor_id(); int first; + wait_for_sibling_cpu(&array_cpus_in, NSEC_PER_SEC); + /* * Only one logical CPU on a core needs to trigger the Array test via MSR write. */ @@ -289,9 +326,6 @@ static int do_array_test(void *data) rdmsrl(MSR_ARRAY_BIST, command->data); } - /* Tests complete faster if the sibling is spinning here */ - wait_for_sibling_cpu(&array_cpus_out, NSEC_PER_SEC); - return 0; } @@ -312,7 +346,7 @@ static void ifs_array_test_core(int cpu, struct device *dev) timed_out = true; break; } - atomic_set(&array_cpus_out, 0); + atomic_set(&array_cpus_in, 0); stop_core_cpuslocked(cpu, do_array_test, &command); if (command.ctrl_result) @@ -361,6 +395,225 @@ static void ifs_array_test_gen1(int cpu, struct device *dev) ifsd->status = SCAN_TEST_PASS; } +#define SBAF_STATUS_PASS 0 +#define SBAF_STATUS_SIGN_FAIL 1 +#define SBAF_STATUS_INTR 2 +#define SBAF_STATUS_TEST_FAIL 3 + +enum sbaf_status_err_code { + IFS_SBAF_NO_ERROR = 0, + IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN = 1, + IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS = 2, + IFS_SBAF_UNASSIGNED_ERROR_CODE3 = 3, + IFS_SBAF_INVALID_BUNDLE_INDEX = 4, + IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS = 5, + IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY = 6, + IFS_SBAF_UNASSIGNED_ERROR_CODE7 = 7, + IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, + IFS_SBAF_INTERRUPTED_DURING_EXECUTION = 9, + IFS_SBAF_INVALID_PROGRAM_INDEX = 0xA, + IFS_SBAF_CORRUPTED_CHUNK = 0xB, + IFS_SBAF_DID_NOT_START = 0xC, +}; + +static const char * const sbaf_test_status[] = { + [IFS_SBAF_NO_ERROR] = "SBAF no error", + [IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", + [IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.", + [IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3", + [IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non-valid sbaf bundles. Reload test image", + [IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", + [IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently", + [IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7", + [IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", + [IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start", + [IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid", + [IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk", + [IFS_SBAF_DID_NOT_START] = "SBAF operation did not start", +}; + +static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data) +{ + union ifs_sbaf_status status = (union ifs_sbaf_status)status_data; + + if (status.error_code < ARRAY_SIZE(sbaf_test_status)) { + dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n", + cpumask_pr_args(cpu_smt_mask(cpu)), + sbaf_test_status[status.error_code]); + } else if (status.error_code == IFS_SW_TIMEOUT) { + dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { + dev_info(dev, "CPU(s) %*pbl: %s\n", + cpumask_pr_args(cpu_smt_mask(cpu)), + "Not all SBAF bundles executed. Maximum forward progress retries exceeded"); + } else { + dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n", + cpumask_pr_args(cpu_smt_mask(cpu)), status.data); + } +} + +static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status) +{ + /* Failed signature check is set when SBAF signature did not match the expected value */ + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) { + dev_err(dev, "CPU(s) %*pbl: Failed signature check\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } + + /* Failed to reach end of test */ + if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) { + dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n", + cpumask_pr_args(cpu_smt_mask(cpu))); + } +} + +static bool sbaf_bundle_completed(union ifs_sbaf_status status) +{ + return !(status.sbaf_status || status.error_code); +} + +static bool sbaf_can_restart(union ifs_sbaf_status status) +{ + enum sbaf_status_err_code err_code = status.error_code; + + /* Signature for chunk is bad, or scan test failed */ + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || + status.sbaf_status == SBAF_STATUS_TEST_FAIL) + return false; + + switch (err_code) { + case IFS_SBAF_NO_ERROR: + case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN: + case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS: + case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT: + case IFS_SBAF_INTERRUPTED_DURING_EXECUTION: + return true; + case IFS_SBAF_UNASSIGNED_ERROR_CODE3: + case IFS_SBAF_INVALID_BUNDLE_INDEX: + case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS: + case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY: + case IFS_SBAF_UNASSIGNED_ERROR_CODE7: + case IFS_SBAF_INVALID_PROGRAM_INDEX: + case IFS_SBAF_CORRUPTED_CHUNK: + case IFS_SBAF_DID_NOT_START: + break; + } + return false; +} + +/* + * Execute the SBAF test. Called "simultaneously" on all threads of a core + * at high priority using the stop_cpus mechanism. + */ +static int dosbaf(void *data) +{ + struct sbaf_run_params *run_params = data; + int cpu = smp_processor_id(); + union ifs_sbaf_status status; + struct ifs_data *ifsd; + int first; + + ifsd = run_params->ifsd; + + /* Only the first logical CPU on a core reports result */ + first = cpumask_first(cpu_smt_mask(cpu)); + wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC); + + /* + * This WRMSR will wait for other HT threads to also write + * to this MSR (at most for activate.delay cycles). Then it + * starts scan of each requested bundle. The core test happens + * during the "execution" of the WRMSR. + */ + wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data); + rdmsrl(MSR_SBAF_STATUS, status.data); + trace_ifs_sbaf(ifsd->cur_batch, *run_params->activate, status); + + /* Pass back the result of the test */ + if (cpu == first) + run_params->status = status; + + return 0; +} + +static void ifs_sbaf_test_core(int cpu, struct device *dev) +{ + struct sbaf_run_params run_params; + union ifs_sbaf_status status = {}; + union ifs_sbaf activate; + unsigned long timeout; + struct ifs_data *ifsd; + int stop_bundle; + int retries; + + ifsd = ifs_get_data(dev); + + activate.data = 0; + activate.delay = IFS_THREAD_WAIT; + + timeout = jiffies + 2 * HZ; + retries = MAX_IFS_RETRIES; + activate.bundle_idx = 0; + stop_bundle = ifsd->max_bundle; + + while (activate.bundle_idx <= stop_bundle) { + if (time_after(jiffies, timeout)) { + status.error_code = IFS_SW_TIMEOUT; + break; + } + + atomic_set(&sbaf_cpus_in, 0); + + run_params.ifsd = ifsd; + run_params.activate = &activate; + run_params.retry_cnt = &retries; + stop_core_cpuslocked(cpu, dosbaf, &run_params); + + status = run_params.status; + + if (sbaf_bundle_completed(status)) { + activate.bundle_idx = status.bundle_idx + 1; + activate.pgm_idx = 0; + retries = MAX_IFS_RETRIES; + continue; + } + + /* Some cases can be retried, give up for others */ + if (!sbaf_can_restart(status)) + break; + + if (status.pgm_idx == activate.pgm_idx) { + /* If no progress retry */ + if (--retries == 0) { + if (status.error_code == IFS_NO_ERROR) + status.error_code = IFS_SW_PARTIAL_COMPLETION; + break; + } + } else { + /* if some progress, more pgms remaining in bundle, reset retries */ + retries = MAX_IFS_RETRIES; + activate.bundle_idx = status.bundle_idx; + activate.pgm_idx = status.pgm_idx; + } + } + + /* Update status for this core */ + ifsd->scan_details = status.data; + + if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL || + status.sbaf_status == SBAF_STATUS_TEST_FAIL) { + ifsd->status = SCAN_TEST_FAIL; + sbaf_message_fail(dev, cpu, status); + } else if (status.error_code || status.sbaf_status == SBAF_STATUS_INTR || + (activate.bundle_idx < stop_bundle)) { + ifsd->status = SCAN_NOT_TESTED; + sbaf_message_not_tested(dev, cpu, status.data); + } else { + ifsd->status = SCAN_TEST_PASS; + } +} + /* * Initiate per core test. It wakes up work queue threads on the target cpu and * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and @@ -394,6 +647,12 @@ int do_core_test(int cpu, struct device *dev) else ifs_array_test_gen1(cpu, dev); break; + case IFS_TYPE_SBAF: + if (!ifsd->loaded) + ret = -EPERM; + else + ifs_sbaf_test_core(cpu, dev); + break; default: ret = -EINVAL; } |