diff options
Diffstat (limited to 'arch/powerpc/kernel/rtas.c')
| -rw-r--r-- | arch/powerpc/kernel/rtas.c | 385 |
1 files changed, 224 insertions, 161 deletions
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c087eeee320f..8d81c1e7a8db 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -18,6 +18,8 @@ #include <linux/kernel.h> #include <linux/lockdep.h> #include <linux/memblock.h> +#include <linux/mutex.h> +#include <linux/nospec.h> #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/reboot.h> @@ -70,14 +72,35 @@ struct rtas_filter { * ppc64le, and we want to keep it that way. It does * not make sense for this to be set when @filter * is NULL. + * @lock: Pointer to an optional dedicated per-function mutex. This + * should be set for functions that require multiple calls in + * sequence to complete a single operation, and such sequences + * will disrupt each other if allowed to interleave. Users of + * this function are required to hold the associated lock for + * the duration of the call sequence. Add an explanatory + * comment to the function table entry if setting this member. */ struct rtas_function { s32 token; const bool banned_for_syscall_on_le:1; const char * const name; const struct rtas_filter *filter; + struct mutex *lock; }; +/* + * Per-function locks for sequence-based RTAS functions. + */ +static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); +static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); +DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); +DEFINE_MUTEX(rtas_ibm_get_vpd_lock); +DEFINE_MUTEX(rtas_ibm_get_indices_lock); +DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); +DEFINE_MUTEX(rtas_ibm_receive_hvpipe_msg_lock); +DEFINE_MUTEX(rtas_ibm_send_hvpipe_msg_lock); + static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__CHECK_EXCEPTION] = { .name = "check-exception", @@ -125,6 +148,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = -1, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ as of v2.13 doesn't explicitly impose any + * restriction, but this typically requires multiple + * calls before success, and there's no reason to + * allow sequences to interleave. + */ + .lock = &rtas_ibm_activate_firmware_lock, }, [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { .name = "ibm,cbe-start-ptcal", @@ -196,6 +226,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 1, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS + * must not call ibm,get-dynamic-sensor-state with + * different inputs until a non-retry status has been + * returned. + */ + .lock = &rtas_ibm_get_dynamic_sensor_state_lock, }, [RTAS_FNIDX__IBM_GET_INDICES] = { .name = "ibm,get-indices", @@ -203,6 +240,12 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = 3, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not + * interleave ibm,get-indices call sequences with + * different inputs. + */ + .lock = &rtas_ibm_get_indices_lock, }, [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { .name = "ibm,get-rio-topology", @@ -220,6 +263,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 0, .size_idx1 = -1, .buf_idx2 = 1, .size_idx2 = 2, }, + /* + * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences + * should not be allowed to interleave. + */ + .lock = &rtas_ibm_get_vpd_lock, }, [RTAS_FNIDX__IBM_GET_XIVE] = { .name = "ibm,get-xive", @@ -239,6 +287,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 2, .size_idx1 = 3, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow + * only one call sequence in progress at a time. + */ + .lock = &rtas_ibm_lpar_perftools_lock, }, [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { .name = "ibm,manage-flash-image", @@ -277,6 +330,14 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 0, .size_idx1 = 1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * This follows a sequence-based pattern similar to + * ibm,get-vpd et al. Since PAPR+ restricts + * interleaving call sequences for other functions of + * this style, assume the restriction applies here, + * even though it's not explicit in the spec. + */ + .lock = &rtas_ibm_physical_attestation_lock, }, [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { .name = "ibm,platform-dump", @@ -284,6 +345,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx1 = 4, .size_idx1 = 5, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent + * sequences of ibm,platform-dump are allowed if they + * are operating on different dump tags. So leave the + * lock pointer unset for now. This may need + * reconsideration if kernel-internal users appear. + */ }, [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { .name = "ibm,power-off-ups", @@ -307,11 +375,27 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = { .name = "ibm,read-slot-reset-state2", }, + [RTAS_FNIDX__IBM_RECEIVE_HVPIPE_MSG] { + .name = "ibm,receive-hvpipe-msg", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.32.1 + */ + .lock = &rtas_ibm_receive_hvpipe_msg_lock, + }, [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { .name = "ibm,remove-pe-dma-window", }, - [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = { - .name = "ibm,reset-pe-dma-windows", + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = { + /* + * Note: PAPR+ v2.13 7.3.31.4.1 spells this as + * "ibm,reset-pe-dma-windows" (plural), but RTAS + * implementations use the singular form in practice. + */ + .name = "ibm,reset-pe-dma-window", }, [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { .name = "ibm,scan-log-dump", @@ -320,12 +404,29 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { .buf_idx2 = -1, .size_idx2 = -1, }, }, + [RTAS_FNIDX__IBM_SEND_HVPIPE_MSG] { + .name = "ibm,send-hvpipe-msg", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.32.2 + */ + .lock = &rtas_ibm_send_hvpipe_msg_lock, + }, [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = { .name = "ibm,set-dynamic-indicator", .filter = &(const struct rtas_filter) { .buf_idx1 = 2, .size_idx1 = -1, .buf_idx2 = -1, .size_idx2 = -1, }, + /* + * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call + * this function with different inputs until a + * non-retry status has been returned. + */ + .lock = &rtas_ibm_set_dynamic_indicator_lock, }, [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { .name = "ibm,set-eeh-option", @@ -454,6 +555,11 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { }, }; +#define for_each_rtas_function(funcp) \ + for (funcp = &rtas_function_table[0]; \ + funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \ + ++funcp) + /* * Nearly all RTAS calls need to be serialized. All uses of the * default rtas_args block must hold rtas_lock. @@ -525,10 +631,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray); static int __init rtas_token_to_function_xarray_init(void) { + const struct rtas_function *func; int err = 0; - for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { - const struct rtas_function *func = &rtas_function_table[i]; + for_each_rtas_function(func) { const s32 token = func->token; if (token == RTAS_UNKNOWN_SERVICE) @@ -544,6 +650,21 @@ static int __init rtas_token_to_function_xarray_init(void) } arch_initcall(rtas_token_to_function_xarray_init); +/* + * For use by sys_rtas(), where the token value is provided by user + * space and we don't want to warn on failed lookups. + */ +static const struct rtas_function *rtas_token_to_function_untrusted(s32 token) +{ + return xa_load(&rtas_token_to_function_xarray, token); +} + +/* + * Reverse lookup for deriving the function descriptor from a + * known-good token value in contexts where the former is not already + * available. @token must be valid, e.g. derived from the result of a + * prior lookup against the function table. + */ static const struct rtas_function *rtas_token_to_function(s32 token) { const struct rtas_function *func; @@ -551,12 +672,22 @@ static const struct rtas_function *rtas_token_to_function(s32 token) if (WARN_ONCE(token < 0, "invalid token %d", token)) return NULL; - func = xa_load(&rtas_token_to_function_xarray, token); - - if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token)) - return NULL; + func = rtas_token_to_function_untrusted(token); + if (func) + return func; + /* + * Fall back to linear scan in case the reverse mapping hasn't + * been initialized yet. + */ + if (xa_empty(&rtas_token_to_function_xarray)) { + for_each_rtas_function(func) { + if (func->token == token) + return func; + } + } - return func; + WARN_ONCE(true, "unexpected failed lookup for token %d", token); + return NULL; } /* This is here deliberately so it's only used in this file */ @@ -570,28 +701,25 @@ static void __do_enter_rtas(struct rtas_args *args) static void __do_enter_rtas_trace(struct rtas_args *args) { - const char *name = NULL; + const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token)); - if (args == &rtas_args) - lockdep_assert_held(&rtas_lock); /* - * If the tracepoints that consume the function name aren't - * active, avoid the lookup. + * If there is a per-function lock, it must be held by the + * caller. */ - if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) { - const s32 token = be32_to_cpu(args->token); - const struct rtas_function *func = rtas_token_to_function(token); + if (func->lock) + lockdep_assert_held(func->lock); - name = func->name; - } + if (args == &rtas_args) + lockdep_assert_held(&rtas_lock); - trace_rtas_input(args, name); + trace_rtas_input(args, func->name); trace_rtas_ll_entry(args); __do_enter_rtas(args); trace_rtas_ll_exit(args); - trace_rtas_output(args, name); + trace_rtas_output(args, func->name); } static void do_enter_rtas(struct rtas_args *args) @@ -670,7 +798,7 @@ static void call_rtas_display_status_delay(char c) static int pending_newline = 0; /* did last write end with unprinted newline? */ static int width = 16; - if (c == '\n') { + if (c == '\n') { while (width-- > 0) call_rtas_display_status(' '); width = 16; @@ -680,7 +808,7 @@ static void call_rtas_display_status_delay(char c) if (pending_newline) { call_rtas_display_status('\r'); call_rtas_display_status('\n'); - } + } pending_newline = 0; if (width--) { call_rtas_display_status(c); @@ -694,66 +822,6 @@ void __init udbg_init_rtas_panel(void) udbg_putc = call_rtas_display_status_delay; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - -/* If you think you're dying before early_init_dt_scan_rtas() does its - * work, you can hard code the token values for your firmware here and - * hardcode rtas.base/entry etc. - */ -static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE; -static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE; - -static void udbg_rtascon_putc(char c) -{ - int tries; - - if (!rtas.base) - return; - - /* Add CRs before LFs */ - if (c == '\n') - udbg_rtascon_putc('\r'); - - /* if there is more than one character to be displayed, wait a bit */ - for (tries = 0; tries < 16; tries++) { - if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0) - break; - udelay(1000); - } -} - -static int udbg_rtascon_getc_poll(void) -{ - int c; - - if (!rtas.base) - return -1; - - if (rtas_call(rtas_getchar_token, 0, 2, &c)) - return -1; - - return c; -} - -static int udbg_rtascon_getc(void) -{ - int c; - - while ((c = udbg_rtascon_getc_poll()) == -1) - ; - - return c; -} - - -void __init udbg_init_rtas_console(void) -{ - udbg_putc = udbg_rtascon_putc; - udbg_getc = udbg_rtascon_getc; - udbg_getc_poll = udbg_rtascon_getc_poll; -} -#endif /* CONFIG_UDBG_RTAS_CONSOLE */ - void rtas_progress(char *s, unsigned short hex) { struct device_node *root; @@ -820,7 +888,7 @@ void rtas_progress(char *s, unsigned short hex) else rtas_call(display_character, 1, 1, NULL, '\r'); } - + if (row_width) width = row_width[current_line]; else @@ -840,9 +908,9 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); return; } - + /* RTAS wants CR-LF, not just LF */ - + if (*os == '\n') { rtas_call(display_character, 1, 1, NULL, '\r'); rtas_call(display_character, 1, 1, NULL, '\n'); @@ -852,7 +920,7 @@ void rtas_progress(char *s, unsigned short hex) */ rtas_call(display_character, 1, 1, NULL, *os); } - + if (row_width) width = row_width[current_line]; else @@ -861,15 +929,15 @@ void rtas_progress(char *s, unsigned short hex) width--; rtas_call(display_character, 1, 1, NULL, *os); } - + os++; - + /* if we overwrite the screen length */ if (width <= 0) while ((*os != 0) && (*os != '\n') && (*os != '\r')) os++; } - + spin_unlock(&progress_lock); } EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ @@ -900,11 +968,6 @@ int rtas_token(const char *service) } EXPORT_SYMBOL_GPL(rtas_token); -int rtas_service_present(const char *service) -{ - return rtas_token(service) != RTAS_UNKNOWN_SERVICE; -} - #ifdef CONFIG_RTAS_ERROR_LOGGING static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; @@ -1291,21 +1354,14 @@ bool __ref rtas_busy_delay(int status) */ ms = clamp(ms, 1U, 1000U); /* - * The delay hint is an order-of-magnitude suggestion, not - * a minimum. It is fine, possibly even advantageous, for - * us to pause for less time than hinted. For small values, - * use usleep_range() to ensure we don't sleep much longer - * than actually needed. - * - * See Documentation/timers/timers-howto.rst for - * explanation of the threshold used here. In effect we use - * usleep_range() for 9900 and 9901, msleep() for - * 9902-9905. + * The delay hint is an order-of-magnitude suggestion, not a + * minimum. It is fine, possibly even advantageous, for us to + * pause for less time than hinted. To make sure pause time will + * not be way longer than requested independent of HZ + * configuration, use fsleep(). See fsleep() for details of + * used sleeping functions. */ - if (ms <= 20) - usleep_range(ms * 100, ms * 1000); - else - msleep(ms); + fsleep(ms * 1000); break; case RTAS_BUSY: ret = true; @@ -1330,33 +1386,34 @@ bool __ref rtas_busy_delay(int status) } EXPORT_SYMBOL_GPL(rtas_busy_delay); -static int rtas_error_rc(int rtas_rc) +int rtas_error_rc(int rtas_rc) { int rc; switch (rtas_rc) { - case -1: /* Hardware Error */ - rc = -EIO; - break; - case -3: /* Bad indicator/domain/etc */ - rc = -EINVAL; - break; - case -9000: /* Isolation error */ - rc = -EFAULT; - break; - case -9001: /* Outstanding TCE/PTE */ - rc = -EEXIST; - break; - case -9002: /* No usable slot */ - rc = -ENODEV; - break; - default: - pr_err("%s: unexpected error %d\n", __func__, rtas_rc); - rc = -ERANGE; - break; + case RTAS_HARDWARE_ERROR: /* Hardware Error */ + rc = -EIO; + break; + case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case -9000: /* Isolation error */ + rc = -EFAULT; + break; + case -9001: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case -9002: /* No usable slot */ + rc = -ENODEV; + break; + default: + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); + rc = -ERANGE; + break; } return rc; } +EXPORT_SYMBOL_GPL(rtas_error_rc); int rtas_get_power_level(int powerdomain, int *level) { @@ -1587,6 +1644,7 @@ static bool ibm_extended_os_term; void rtas_os_term(char *str) { s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); + static struct rtas_args args; int status; /* @@ -1607,7 +1665,8 @@ void rtas_os_term(char *str) * schedules. */ do { - status = rtas_call(token, 1, 1, NULL, __pa(rtas_os_term_buf)); + rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf)); + status = be32_to_cpu(args.rets[0]); } while (rtas_busy_delay_time(status)); if (status != 0) @@ -1635,10 +1694,14 @@ void rtas_activate_firmware(void) return; } + mutex_lock(&rtas_ibm_activate_firmware_lock); + do { fwrc = rtas_call(token, 0, 1, NULL); } while (rtas_busy_delay(fwrc)); + mutex_unlock(&rtas_ibm_activate_firmware_lock); + if (fwrc) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } @@ -1710,24 +1773,18 @@ static bool in_rmo_buf(u32 base, u32 end) end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } -static bool block_rtas_call(int token, int nargs, +static bool block_rtas_call(const struct rtas_function *func, int nargs, struct rtas_args *args) { - const struct rtas_function *func; const struct rtas_filter *f; - const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP); - const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); + const bool is_platform_dump = + func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP]; + const bool is_config_conn = + func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR]; u32 base, size, end; /* - * If this token doesn't correspond to a function the kernel - * understands, you're not allowed to call it. - */ - func = rtas_token_to_function(token); - if (!func) - goto err; - /* - * And only functions with filters attached are allowed. + * Only functions with filters attached are allowed. */ f = func->filter; if (!f) @@ -1784,14 +1841,15 @@ static bool block_rtas_call(int token, int nargs, return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); - pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", - token, nargs, current->comm); + pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n", + func->name, nargs, current->comm); return true; } /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { + const struct rtas_function *func; struct pin_cookie cookie; struct rtas_args args; unsigned long flags; @@ -1816,18 +1874,26 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; + nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args)); + nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs); + /* Copy in args. */ if (copy_from_user(args.args, uargs->args, nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; - if (token == RTAS_UNKNOWN_SERVICE) + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function_untrusted(token); + if (!func) return -EINVAL; args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); - if (block_rtas_call(token, nargs, &args)) + if (block_rtas_call(func, nargs, &args)) return -EINVAL; if (token_is_restricted_errinjct(token)) { @@ -1860,6 +1926,15 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); + /* + * If this function has a mutex assigned to it, we must + * acquire it to avoid interleaving with any kernel-based uses + * of the same function. Kernel-based sequences acquire the + * appropriate mutex explicitly. + */ + if (func->lock) + mutex_lock(func->lock); + raw_spin_lock_irqsave(&rtas_lock, flags); cookie = lockdep_pin_lock(&rtas_lock); @@ -1875,6 +1950,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) lockdep_unpin_lock(&rtas_lock, cookie); raw_spin_unlock_irqrestore(&rtas_lock, flags); + if (func->lock) + mutex_unlock(func->lock); + if (buff_copy) { if (errbuf) log_error(errbuf, ERR_TYPE_RTAS_LOG, 0); @@ -2021,21 +2099,6 @@ int __init early_init_dt_scan_rtas(unsigned long node, rtas.size = *sizep; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - basep = of_get_flat_dt_prop(node, "put-term-char", NULL); - if (basep) - rtas_putchar_token = *basep; - - basep = of_get_flat_dt_prop(node, "get-term-char", NULL); - if (basep) - rtas_getchar_token = *basep; - - if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && - rtas_getchar_token != RTAS_UNKNOWN_SERVICE) - udbg_init_rtas_console(); - -#endif - /* break now */ return 1; } |
