diff options
Diffstat (limited to 'kernel/module')
-rw-r--r-- | kernel/module/Kconfig | 144 | ||||
-rw-r--r-- | kernel/module/Makefile | 2 | ||||
-rw-r--r-- | kernel/module/debug_kmemleak.c | 19 | ||||
-rw-r--r-- | kernel/module/dups.c | 1 | ||||
-rw-r--r-- | kernel/module/internal.h | 46 | ||||
-rw-r--r-- | kernel/module/kallsyms.c | 98 | ||||
-rw-r--r-- | kernel/module/kmod.c | 1 | ||||
-rw-r--r-- | kernel/module/main.c | 1085 | ||||
-rw-r--r-- | kernel/module/strict_rwx.c | 19 | ||||
-rw-r--r-- | kernel/module/sysfs.c | 180 | ||||
-rw-r--r-- | kernel/module/tracking.c | 2 | ||||
-rw-r--r-- | kernel/module/tree_lookup.c | 8 | ||||
-rw-r--r-- | kernel/module/version.c | 61 |
13 files changed, 1093 insertions, 573 deletions
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index f3e0329337f6..39278737bb68 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -2,6 +2,7 @@ menuconfig MODULES bool "Enable loadable module support" modules + select EXECMEM help Kernel modules are small pieces of compiled code which can be inserted in the running kernel, rather than being @@ -159,6 +160,7 @@ config MODULE_UNLOAD_TAINT_TRACKING config MODVERSIONS bool "Module versioning support" + depends on !COMPILE_TEST help Usually, you have to use modules compiled with your kernel. Saying Y here makes it sometimes possible to use modules @@ -167,6 +169,41 @@ config MODVERSIONS make them incompatible with the kernel you are running. If unsure, say N. +choice + prompt "Module versioning implementation" + depends on MODVERSIONS + help + Select the tool used to calculate symbol versions for modules. + + If unsure, select GENKSYMS. + +config GENKSYMS + bool "genksyms (from source code)" + help + Calculate symbol versions from pre-processed source code using + genksyms. + + If unsure, say Y. + +config GENDWARFKSYMS + bool "gendwarfksyms (from debugging information)" + depends on DEBUG_INFO + # Requires full debugging information, split DWARF not supported. + depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT + # Requires ELF object files. + depends on !LTO + # To avoid conflicts with the discarded __gendwarfksyms_ptr symbols on + # X86, requires pahole before commit 47dcb534e253 ("btf_encoder: Stop + # indexing symbols for VARs") or after commit 9810758003ce ("btf_encoder: + # Verify 0 address DWARF variables are in ELF section"). + depends on !X86 || !DEBUG_INFO_BTF || PAHOLE_VERSION < 128 || PAHOLE_VERSION > 129 + help + Calculate symbol versions from DWARF debugging information using + gendwarfksyms. Requires DEBUG_INFO to be enabled. + + If unsure, say N. +endchoice + config ASM_MODVERSIONS bool default HAVE_ASM_MODVERSIONS && MODVERSIONS @@ -175,6 +212,31 @@ config ASM_MODVERSIONS assembly. This can be enabled only when the target architecture supports it. +config EXTENDED_MODVERSIONS + bool "Extended Module Versioning Support" + depends on MODVERSIONS + help + This enables extended MODVERSIONs support, allowing long symbol + names to be versioned. + + The most likely reason you would enable this is to enable Rust + support. If unsure, say N. + +config BASIC_MODVERSIONS + bool "Basic Module Versioning Support" + depends on MODVERSIONS + default y + help + This enables basic MODVERSIONS support, allowing older tools or + kernels to potentially load modules. + + Disabling this may cause older `modprobe` or `kmod` to be unable + to read MODVERSIONS information from built modules. With this + disabled, older kernels may treat this module as unversioned. + + This is enabled by default when MODVERSIONS are enabled. + If unsure, say Y. + config MODULE_SRCVERSION_ALL bool "Source checksum for all modules" help @@ -227,8 +289,9 @@ comment "Do not forget to sign required modules with scripts/sign-file" depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL choice - prompt "Which hash algorithm should modules be signed with?" + prompt "Hash algorithm to sign modules" depends on MODULE_SIG || IMA_APPRAISE_MODSIG + default MODULE_SIG_SHA512 help This determines which sort of hashing algorithm will be used during signature generation. This algorithm _must_ be built into the kernel @@ -237,31 +300,31 @@ choice the signature on that module. config MODULE_SIG_SHA1 - bool "Sign modules with SHA-1" + bool "SHA-1" select CRYPTO_SHA1 config MODULE_SIG_SHA256 - bool "Sign modules with SHA-256" + bool "SHA-256" select CRYPTO_SHA256 config MODULE_SIG_SHA384 - bool "Sign modules with SHA-384" + bool "SHA-384" select CRYPTO_SHA512 config MODULE_SIG_SHA512 - bool "Sign modules with SHA-512" + bool "SHA-512" select CRYPTO_SHA512 config MODULE_SIG_SHA3_256 - bool "Sign modules with SHA3-256" + bool "SHA3-256" select CRYPTO_SHA3 config MODULE_SIG_SHA3_384 - bool "Sign modules with SHA3-384" + bool "SHA3-384" select CRYPTO_SHA3 config MODULE_SIG_SHA3_512 - bool "Sign modules with SHA3-512" + bool "SHA3-512" select CRYPTO_SHA3 endchoice @@ -277,64 +340,65 @@ config MODULE_SIG_HASH default "sha3-384" if MODULE_SIG_SHA3_384 default "sha3-512" if MODULE_SIG_SHA3_512 -choice - prompt "Module compression mode" +config MODULE_COMPRESS + bool "Module compression" help - This option allows you to choose the algorithm which will be used to - compress modules when 'make modules_install' is run. (or, you can - choose to not compress modules at all.) - - External modules will also be compressed in the same way during the - installation. - - For modules inside an initrd or initramfs, it's more efficient to - compress the whole initrd or initramfs instead. - + Enable module compression to reduce on-disk size of module binaries. This is fully compatible with signed modules. - Please note that the tool used to load modules needs to support the - corresponding algorithm. module-init-tools MAY support gzip, and kmod - MAY support gzip, xz and zstd. + The tool used to work with modules needs to support the selected + compression type. kmod MAY support gzip, xz and zstd. Other tools + might have a limited selection of the supported types. - Your build system needs to provide the appropriate compression tool - to compress the modules. + Note that for modules inside an initrd or initramfs, it's more + efficient to compress the whole ramdisk instead. - If in doubt, select 'None'. + If unsure, say N. -config MODULE_COMPRESS_NONE - bool "None" +choice + prompt "Module compression type" + depends on MODULE_COMPRESS help - Do not compress modules. The installed modules are suffixed - with .ko. + Choose the supported algorithm for module compression. config MODULE_COMPRESS_GZIP bool "GZIP" help - Compress modules with GZIP. The installed modules are suffixed - with .ko.gz. + Support modules compressed with GZIP. The installed modules are + suffixed with .ko.gz. config MODULE_COMPRESS_XZ bool "XZ" help - Compress modules with XZ. The installed modules are suffixed - with .ko.xz. + Support modules compressed with XZ. The installed modules are + suffixed with .ko.xz. config MODULE_COMPRESS_ZSTD bool "ZSTD" help - Compress modules with ZSTD. The installed modules are suffixed - with .ko.zst. + Support modules compressed with ZSTD. The installed modules are + suffixed with .ko.zst. endchoice +config MODULE_COMPRESS_ALL + bool "Automatically compress all modules" + default y + depends on MODULE_COMPRESS + help + Compress all modules during 'make modules_install'. + + Your build system needs to provide the appropriate compression tool + for the selected compression type. External modules will also be + compressed in the same way during the installation. + config MODULE_DECOMPRESS bool "Support in-kernel module decompression" - depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ || MODULE_COMPRESS_ZSTD + depends on MODULE_COMPRESS select ZLIB_INFLATE if MODULE_COMPRESS_GZIP select XZ_DEC if MODULE_COMPRESS_XZ select ZSTD_DECOMPRESS if MODULE_COMPRESS_ZSTD help - Support for decompressing kernel modules by the kernel itself instead of relying on userspace to perform this task. Useful when load pinning security policy is enabled. @@ -346,7 +410,7 @@ config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS help Symbols exported with EXPORT_SYMBOL_NS*() are considered exported in a namespace. A module that makes use of a symbol exported with such a - namespace is required to import the namespace via MODULE_IMPORT_NS(). + namespace is required to import the namespace via MODULE_IMPORT_NS(""). There is no technical reason to enforce correct namespace imports, but it creates consistency between symbols defining namespaces and users importing namespaces they make use of. This option relaxes this @@ -392,7 +456,7 @@ config UNUSED_KSYMS_WHITELIST exported at all times, even in absence of in-tree users. The value to set here is the path to a text file containing the list of symbols, one per line. The path can be absolute, or relative to the kernel - source tree. + source or obj tree. config MODULES_TREE_LOOKUP def_bool y diff --git a/kernel/module/Makefile b/kernel/module/Makefile index a10b2b9a6fdf..50ffcc413b54 100644 --- a/kernel/module/Makefile +++ b/kernel/module/Makefile @@ -5,7 +5,7 @@ # These are called from save_stack_trace() on slub debug path, # and produce insane amounts of uninteresting coverage. -KCOV_INSTRUMENT_module.o := n +KCOV_INSTRUMENT_main.o := n obj-y += main.o obj-y += strict_rwx.o diff --git a/kernel/module/debug_kmemleak.c b/kernel/module/debug_kmemleak.c index 12a569d361e8..df873dad049d 100644 --- a/kernel/module/debug_kmemleak.c +++ b/kernel/module/debug_kmemleak.c @@ -12,19 +12,10 @@ void kmemleak_load_module(const struct module *mod, const struct load_info *info) { - unsigned int i; - - /* only scan the sections containing data */ - kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); - - for (i = 1; i < info->hdr->e_shnum; i++) { - /* Scan all writable sections that's not executable */ - if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) || - !(info->sechdrs[i].sh_flags & SHF_WRITE) || - (info->sechdrs[i].sh_flags & SHF_EXECINSTR)) - continue; - - kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, - info->sechdrs[i].sh_size, GFP_KERNEL); + /* only scan writable, non-executable sections */ + for_each_mod_mem_type(type) { + if (type != MOD_DATA && type != MOD_INIT_DATA && + !mod->mem[type].is_rox) + kmemleak_no_scan(mod->mem[type].base); } } diff --git a/kernel/module/dups.c b/kernel/module/dups.c index 9a92f2f8c9d3..bd2149fbe117 100644 --- a/kernel/module/dups.c +++ b/kernel/module/dups.c @@ -18,7 +18,6 @@ #include <linux/completion.h> #include <linux/cred.h> #include <linux/file.h> -#include <linux/fdtable.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/mount.h> diff --git a/kernel/module/internal.h b/kernel/module/internal.h index 2ebece8a789f..626cf8668a7e 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -47,16 +47,16 @@ struct kernel_symbol { extern struct mutex module_mutex; extern struct list_head modules; -extern struct module_attribute *modinfo_attrs[]; -extern size_t modinfo_attrs_count; +extern const struct module_attribute *const modinfo_attrs[]; +extern const size_t modinfo_attrs_count; /* Provided by the linker */ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; -extern const s32 __start___kcrctab[]; -extern const s32 __start___kcrctab_gpl[]; +extern const u32 __start___kcrctab[]; +extern const u32 __start___kcrctab_gpl[]; struct load_info { const char *name; @@ -80,7 +80,14 @@ struct load_info { unsigned int used_pages; #endif struct { - unsigned int sym, str, mod, vers, info, pcpu; + unsigned int sym; + unsigned int str; + unsigned int mod; + unsigned int vers; + unsigned int info; + unsigned int pcpu; + unsigned int vers_ext_crc; + unsigned int vers_ext_name; } index; }; @@ -97,7 +104,7 @@ struct find_symbol_arg { /* Output */ struct module *owner; - const s32 *crc; + const u32 *crc; const struct kernel_symbol *sym; enum mod_license license; }; @@ -117,17 +124,6 @@ char *module_next_tag_pair(char *string, unsigned long *secsize); #define for_each_modinfo_entry(entry, info, name) \ for (entry = get_modinfo(info, name); entry; entry = get_next_modinfo(info, name, entry)) -static inline void module_assert_mutex_or_preempt(void) -{ -#ifdef CONFIG_LOCKDEP - if (unlikely(!debug_locks)) - return; - - WARN_ON_ONCE(!rcu_read_lock_sched_held() && - !lockdep_is_held(&module_mutex)); -#endif -} - static inline unsigned long kernel_symbol_value(const struct kernel_symbol *sym) { #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS @@ -322,7 +318,8 @@ static inline struct module *mod_find(unsigned long addr, struct mod_tree_root * } #endif /* CONFIG_MODULES_TREE_LOOKUP */ -int module_enable_rodata_ro(const struct module *mod, bool after_init); +int module_enable_rodata_ro(const struct module *mod); +int module_enable_rodata_ro_after_init(const struct module *mod); int module_enable_data_nx(const struct module *mod); int module_enable_text_rox(const struct module *mod); int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -379,16 +376,25 @@ static inline void init_param_lock(struct module *mod) { } #ifdef CONFIG_MODVERSIONS int check_version(const struct load_info *info, - const char *symname, struct module *mod, const s32 *crc); + const char *symname, struct module *mod, const u32 *crc); void module_layout(struct module *mod, struct modversion_info *ver, struct kernel_param *kp, struct kernel_symbol *ks, struct tracepoint * const *tp); int check_modstruct_version(const struct load_info *info, struct module *mod); int same_magic(const char *amagic, const char *bmagic, bool has_crcs); +struct modversion_info_ext { + size_t remaining; + const u32 *crc; + const char *name; +}; +void modversion_ext_start(const struct load_info *info, struct modversion_info_ext *ver); +void modversion_ext_advance(struct modversion_info_ext *ver); +#define for_each_modversion_info_ext(ver, info) \ + for (modversion_ext_start(info, &ver); ver.remaining > 0; modversion_ext_advance(&ver)) #else /* !CONFIG_MODVERSIONS */ static inline int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { return 1; } diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index ef73ae7c8909..00a60796327c 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -177,19 +177,15 @@ void add_kallsyms(struct module *mod, const struct load_info *info) unsigned long strtab_size; void *data_base = mod->mem[MOD_DATA].base; void *init_data_base = mod->mem[MOD_INIT_DATA].base; + struct mod_kallsyms *kallsyms; - /* Set up to point into init section. */ - mod->kallsyms = (void __rcu *)init_data_base + - info->mod_kallsyms_init_off; + kallsyms = init_data_base + info->mod_kallsyms_init_off; - rcu_read_lock(); - /* The following is safe since this pointer cannot change */ - rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; - rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + kallsyms->symtab = (void *)symsec->sh_addr; + kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - rcu_dereference(mod->kallsyms)->strtab = - (void *)info->sechdrs[info->index.str].sh_addr; - rcu_dereference(mod->kallsyms)->typetab = init_data_base + info->init_typeoffs; + kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; + kallsyms->typetab = init_data_base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init @@ -199,20 +195,19 @@ void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.strtab = s = data_base + info->stroffs; mod->core_kallsyms.typetab = data_base + info->core_typeoffs; strtab_size = info->core_typeoffs - info->stroffs; - src = rcu_dereference(mod->kallsyms)->symtab; - for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { - rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); + src = kallsyms->symtab; + for (ndst = i = 0; i < kallsyms->num_symtab; i++) { + kallsyms->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { ssize_t ret; mod->core_kallsyms.typetab[ndst] = - rcu_dereference(mod->kallsyms)->typetab[i]; + kallsyms->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; - ret = strscpy(s, - &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], + ret = strscpy(s, &kallsyms->strtab[src[i].st_name], strtab_size); if (ret < 0) break; @@ -220,7 +215,9 @@ void add_kallsyms(struct module *mod, const struct load_info *info) strtab_size -= ret + 1; } } - rcu_read_unlock(); + + /* Set up to point into init section. */ + rcu_assign_pointer(mod->kallsyms, kallsyms); mod->core_kallsyms.num_symtab = ndst; } @@ -260,7 +257,7 @@ static const char *find_kallsyms_symbol(struct module *mod, { unsigned int i, best = 0; unsigned long nextval, bestval; - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms); struct module_memory *mod_mem; /* At worse, next value is at end of module */ @@ -319,19 +316,20 @@ void * __weak dereference_module_function_descriptor(struct module *mod, /* * For kallsyms to ask for address resolution. NULL means not found. Careful - * not to lock to avoid deadlock on oopses, simply disable preemption. + * not to lock to avoid deadlock on oopses, RCU is enough. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *size, - unsigned long *offset, - char **modname, - const unsigned char **modbuildid, - char *namebuf) +int module_address_lookup(unsigned long addr, + unsigned long *size, + unsigned long *offset, + char **modname, + const unsigned char **modbuildid, + char *namebuf) { - const char *ret = NULL; + const char *sym; + int ret = 0; struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address(addr); if (mod) { if (modname) @@ -344,15 +342,11 @@ const char *module_address_lookup(unsigned long addr, #endif } - ret = find_kallsyms_symbol(mod, addr, size, offset); - } - /* Make a copy in here where it's safe */ - if (ret) { - strncpy(namebuf, ret, KSYM_NAME_LEN - 1); - ret = namebuf; - } - preempt_enable(); + sym = find_kallsyms_symbol(mod, addr, size, offset); + if (sym) + ret = strscpy(namebuf, sym, KSYM_NAME_LEN); + } return ret; } @@ -360,7 +354,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) { struct module *mod; - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -372,12 +366,10 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) goto out; strscpy(symname, sym, KSYM_NAME_LEN); - preempt_enable(); return 0; } } out: - preempt_enable(); return -ERANGE; } @@ -386,13 +378,13 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, { struct module *mod; - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { struct mod_kallsyms *kallsyms; if (mod->state == MODULE_STATE_UNFORMED) continue; - kallsyms = rcu_dereference_sched(mod->kallsyms); + kallsyms = rcu_dereference(mod->kallsyms); if (symnum < kallsyms->num_symtab) { const Elf_Sym *sym = &kallsyms->symtab[symnum]; @@ -401,12 +393,10 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, strscpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN); strscpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); - preempt_enable(); return 0; } symnum -= kallsyms->num_symtab; } - preempt_enable(); return -ERANGE; } @@ -414,7 +404,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static unsigned long __find_kallsyms_symbol_value(struct module *mod, const char *name) { unsigned int i; - struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); + struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; @@ -454,23 +444,15 @@ static unsigned long __module_kallsyms_lookup_name(const char *name) /* Look for this name: can be of form module:name. */ unsigned long module_kallsyms_lookup_name(const char *name) { - unsigned long ret; - /* Don't lock: we're in enough trouble already. */ - preempt_disable(); - ret = __module_kallsyms_lookup_name(name); - preempt_enable(); - return ret; + guard(rcu)(); + return __module_kallsyms_lookup_name(name); } unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name) { - unsigned long ret; - - preempt_disable(); - ret = __find_kallsyms_symbol_value(mod, name); - preempt_enable(); - return ret; + guard(rcu)(); + return __find_kallsyms_symbol_value(mod, name); } int module_kallsyms_on_each_symbol(const char *modname, @@ -491,10 +473,8 @@ int module_kallsyms_on_each_symbol(const char *modname, if (modname && strcmp(modname, mod->name)) continue; - /* Use rcu_dereference_sched() to remain compliant with the sparse tool */ - preempt_disable(); - kallsyms = rcu_dereference_sched(mod->kallsyms); - preempt_enable(); + kallsyms = rcu_dereference_check(mod->kallsyms, + lockdep_is_held(&module_mutex)); for (i = 0; i < kallsyms->num_symtab; i++) { const Elf_Sym *sym = &kallsyms->symtab[i]; diff --git a/kernel/module/kmod.c b/kernel/module/kmod.c index 0800d9891692..25f253812512 100644 --- a/kernel/module/kmod.c +++ b/kernel/module/kmod.c @@ -15,7 +15,6 @@ #include <linux/completion.h> #include <linux/cred.h> #include <linux/file.h> -#include <linux/fdtable.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/mount.h> diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..5c6ab20240a6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -56,7 +56,9 @@ #include <linux/dynamic_debug.h> #include <linux/audit.h> #include <linux/cfi.h> +#include <linux/codetag.h> #include <linux/debugfs.h> +#include <linux/execmem.h> #include <uapi/linux/module.h> #include "internal.h" @@ -65,7 +67,7 @@ /* * Mutex protects: - * 1) List of modules (also safely readable with preempt_disable), + * 1) List of modules (also safely readable within RCU read section), * 2) module_use links, * 3) mod_tree.addr_min/mod_tree.addr_max. * (delete and add uses RCU list operations). @@ -84,7 +86,7 @@ struct mod_tree_root mod_tree __cacheline_aligned = { struct symsearch { const struct kernel_symbol *start, *stop; - const s32 *crcs; + const u32 *crcs; enum mod_license license; }; @@ -193,6 +195,38 @@ static unsigned int find_sec(const struct load_info *info, const char *name) return 0; } +/** + * find_any_unique_sec() - Find a unique section index by name + * @info: Load info for the module to scan + * @name: Name of the section we're looking for + * + * Locates a unique section by name. Ignores SHF_ALLOC. + * + * Return: Section index if found uniquely, zero if absent, negative count + * of total instances if multiple were found. + */ +static int find_any_unique_sec(const struct load_info *info, const char *name) +{ + unsigned int idx; + unsigned int count = 0; + int i; + + for (i = 1; i < info->hdr->e_shnum; i++) { + if (strcmp(info->secstrings + info->sechdrs[i].sh_name, + name) == 0) { + count++; + idx = i; + } + } + if (count == 1) { + return idx; + } else if (count == 0) { + return 0; + } else { + return -count; + } +} + /* Find a module section, or NULL. */ static void *section_addr(const struct load_info *info, const char *name) { @@ -297,7 +331,7 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms, /* * Find an exported symbol and return it, along with, (optional) crc and - * (optional) module which owns it. Needs preempt disabled or module_mutex. + * (optional) module which owns it. Needs RCU or module_mutex. */ bool find_symbol(struct find_symbol_arg *fsa) { @@ -311,8 +345,6 @@ bool find_symbol(struct find_symbol_arg *fsa) struct module *mod; unsigned int i; - module_assert_mutex_or_preempt(); - for (i = 0; i < ARRAY_SIZE(arr); i++) if (find_exported_symbol_in_section(&arr[i], NULL, fsa)) return true; @@ -340,16 +372,14 @@ bool find_symbol(struct find_symbol_arg *fsa) } /* - * Search for module by name: must hold module_mutex (or preempt disabled - * for read-only access). + * Search for module by name: must hold module_mutex (or RCU for read-only + * access). */ struct module *find_module_all(const char *name, size_t len, bool even_unformed) { struct module *mod; - module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list, lockdep_is_held(&module_mutex)) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) @@ -420,8 +450,7 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) struct module *mod; unsigned int cpu; - preempt_disable(); - + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -438,13 +467,10 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) per_cpu_ptr(mod->percpu, get_boot_cpu_id()); } - preempt_enable(); return true; } } } - - preempt_enable(); return false; } @@ -504,7 +530,7 @@ static void setup_modinfo_##field(struct module *mod, const char *s) \ { \ mod->field = kstrdup(s, GFP_KERNEL); \ } \ -static ssize_t show_modinfo_##field(struct module_attribute *mattr, \ +static ssize_t show_modinfo_##field(const struct module_attribute *mattr, \ struct module_kobject *mk, char *buffer) \ { \ return scnprintf(buffer, PAGE_SIZE, "%s\n", mk->mod->field); \ @@ -518,7 +544,7 @@ static void free_modinfo_##field(struct module *mod) \ kfree(mod->field); \ mod->field = NULL; \ } \ -static struct module_attribute modinfo_##field = { \ +static const struct module_attribute modinfo_##field = { \ .attr = { .name = __stringify(field), .mode = 0444 }, \ .show = show_modinfo_##field, \ .setup = setup_modinfo_##field, \ @@ -761,8 +787,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, async_synchronize_full(); /* Store the name and taints of the last unloaded module for diagnostic purposes */ - strscpy(last_unloaded_module.name, mod->name, sizeof(last_unloaded_module.name)); - strscpy(last_unloaded_module.taints, module_flags(mod, buf, false), sizeof(last_unloaded_module.taints)); + strscpy(last_unloaded_module.name, mod->name); + strscpy(last_unloaded_module.taints, module_flags(mod, buf, false)); free_module(mod); /* someone could wait for the module in add_unformed_module() */ @@ -780,10 +806,9 @@ void __symbol_put(const char *symbol) .gplok = true, }; - preempt_disable(); + guard(rcu)(); BUG_ON(!find_symbol(&fsa)); module_put(fsa.owner); - preempt_enable(); } EXPORT_SYMBOL(__symbol_put); @@ -798,23 +823,22 @@ void symbol_put_addr(void *addr) /* * Even though we hold a reference on the module; we still need to - * disable preemption in order to safely traverse the data structure. + * RCU read section in order to safely traverse the data structure. */ - preempt_disable(); + guard(rcu)(); modaddr = __module_text_address(a); BUG_ON(!modaddr); module_put(modaddr); - preempt_enable(); } EXPORT_SYMBOL_GPL(symbol_put_addr); -static ssize_t show_refcnt(struct module_attribute *mattr, +static ssize_t show_refcnt(const struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { return sprintf(buffer, "%i\n", module_refcount(mk->mod)); } -static struct module_attribute modinfo_refcnt = +static const struct module_attribute modinfo_refcnt = __ATTR(refcnt, 0444, show_refcnt, NULL); void __module_get(struct module *module) @@ -883,7 +907,7 @@ size_t module_flags_taint(unsigned long taints, char *buf) return l; } -static ssize_t show_initstate(struct module_attribute *mattr, +static ssize_t show_initstate(const struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { const char *state = "unknown"; @@ -904,10 +928,10 @@ static ssize_t show_initstate(struct module_attribute *mattr, return sprintf(buffer, "%s\n", state); } -static struct module_attribute modinfo_initstate = +static const struct module_attribute modinfo_initstate = __ATTR(initstate, 0444, show_initstate, NULL); -static ssize_t store_uevent(struct module_attribute *mattr, +static ssize_t store_uevent(const struct module_attribute *mattr, struct module_kobject *mk, const char *buffer, size_t count) { @@ -917,10 +941,10 @@ static ssize_t store_uevent(struct module_attribute *mattr, return rc ? rc : count; } -struct module_attribute module_uevent = +const struct module_attribute module_uevent = __ATTR(uevent, 0200, NULL, store_uevent); -static ssize_t show_coresize(struct module_attribute *mattr, +static ssize_t show_coresize(const struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { unsigned int size = mk->mod->mem[MOD_TEXT].size; @@ -932,11 +956,11 @@ static ssize_t show_coresize(struct module_attribute *mattr, return sprintf(buffer, "%u\n", size); } -static struct module_attribute modinfo_coresize = +static const struct module_attribute modinfo_coresize = __ATTR(coresize, 0444, show_coresize, NULL); #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC -static ssize_t show_datasize(struct module_attribute *mattr, +static ssize_t show_datasize(const struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { unsigned int size = 0; @@ -946,11 +970,11 @@ static ssize_t show_datasize(struct module_attribute *mattr, return sprintf(buffer, "%u\n", size); } -static struct module_attribute modinfo_datasize = +static const struct module_attribute modinfo_datasize = __ATTR(datasize, 0444, show_datasize, NULL); #endif -static ssize_t show_initsize(struct module_attribute *mattr, +static ssize_t show_initsize(const struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { unsigned int size = 0; @@ -960,10 +984,10 @@ static ssize_t show_initsize(struct module_attribute *mattr, return sprintf(buffer, "%u\n", size); } -static struct module_attribute modinfo_initsize = +static const struct module_attribute modinfo_initsize = __ATTR(initsize, 0444, show_initsize, NULL); -static ssize_t show_taint(struct module_attribute *mattr, +static ssize_t show_taint(const struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { size_t l; @@ -973,10 +997,10 @@ static ssize_t show_taint(struct module_attribute *mattr, return l; } -static struct module_attribute modinfo_taint = +static const struct module_attribute modinfo_taint = __ATTR(taint, 0444, show_taint, NULL); -struct module_attribute *modinfo_attrs[] = { +const struct module_attribute *const modinfo_attrs[] = { &module_uevent, &modinfo_version, &modinfo_srcversion, @@ -993,7 +1017,7 @@ struct module_attribute *modinfo_attrs[] = { NULL, }; -size_t modinfo_attrs_count = ARRAY_SIZE(modinfo_attrs); +const size_t modinfo_attrs_count = ARRAY_SIZE(modinfo_attrs); static const char vermagic[] = VERMAGIC_STRING; @@ -1155,7 +1179,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod, getname: /* We must make copy under the lock if we failed to get ref. */ - strncpy(ownername, module_name(fsa.owner), MODULE_NAME_LEN); + strscpy(ownername, module_name(fsa.owner), MODULE_NAME_LEN); unlock: mutex_unlock(&module_mutex); return fsa.sym; @@ -1179,16 +1203,6 @@ resolve_symbol_wait(struct module *mod, return ksym; } -void __weak module_memfree(void *module_region) -{ - /* - * This memory may be RO, and freeing RO memory in an interrupt is not - * supported by vmalloc. - */ - WARN_ON(in_interrupt()); - vfree(module_region); -} - void __weak module_arch_cleanup(struct module *mod) { } @@ -1197,25 +1211,69 @@ void __weak module_arch_freeing_init(struct module *mod) { } -static bool mod_mem_use_vmalloc(enum mod_mem_type type) +static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { - return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) && - mod_mem_type_is_core_data(type); + unsigned int size = PAGE_ALIGN(mod->mem[type].size); + enum execmem_type execmem_type; + void *ptr; + + mod->mem[type].size = size; + + if (mod_mem_type_is_data(type)) + execmem_type = EXECMEM_MODULE_DATA; + else + execmem_type = EXECMEM_MODULE_TEXT; + + ptr = execmem_alloc(execmem_type, size); + if (!ptr) + return -ENOMEM; + + if (execmem_is_rox(execmem_type)) { + int err = execmem_make_temp_rw(ptr, size); + + if (err) { + execmem_free(ptr); + return -ENOMEM; + } + + mod->mem[type].is_rox = true; + } + + /* + * The pointer to these blocks of memory are stored on the module + * structure and we keep that around so long as the module is + * around. We only free that memory when we unload the module. + * Just mark them as not being a leak then. The .init* ELF + * sections *do* get freed after boot so we *could* treat them + * slightly differently with kmemleak_ignore() and only grey + * them out as they work as typical memory allocations which + * *do* eventually get freed, but let's just keep things simple + * and avoid *any* false positives. + */ + if (!mod->mem[type].is_rox) + kmemleak_not_leak(ptr); + + memset(ptr, 0, size); + mod->mem[type].base = ptr; + + return 0; } -static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) +static void module_memory_restore_rox(struct module *mod) { - if (mod_mem_use_vmalloc(type)) - return vzalloc(size); - return module_alloc(size); + for_class_mod_mem_type(type, text) { + struct module_memory *mem = &mod->mem[type]; + + if (mem->is_rox) + execmem_restore_rox(mem->base, mem->size); + } } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(struct module *mod, enum mod_mem_type type) { - if (mod_mem_use_vmalloc(type)) - vfree(ptr); - else - module_memfree(ptr); + struct module_memory *mem = &mod->mem[type]; + + execmem_free(mem->base); } static void free_mod_mem(struct module *mod) @@ -1229,12 +1287,12 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod, type); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod, MOD_DATA); } /* Free a module, remove from lists, etc. */ @@ -1242,6 +1300,8 @@ static void free_module(struct module *mod) { trace_module_free(mod); + codetag_unload_module(mod); + mod_sysfs_teardown(mod); /* @@ -1271,7 +1331,7 @@ static void free_module(struct module *mod) mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); - /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ + /* Wait for RCU synchronizing before releasing mod->list and buglist. */ synchronize_rcu(); if (try_add_tainted_module(mod)) pr_err("%s: adding tainted module to the unloaded tainted modules list failed.\n", @@ -1294,21 +1354,18 @@ void *__symbol_get(const char *symbol) .warn = true, }; - preempt_disable(); - if (!find_symbol(&fsa)) - goto fail; - if (fsa.license != GPL_ONLY) { - pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n", - symbol); - goto fail; + scoped_guard(rcu) { + if (!find_symbol(&fsa)) + return NULL; + if (fsa.license != GPL_ONLY) { + pr_warn("failing symbol_get of non-GPLONLY symbol %s.\n", + symbol); + return NULL; + } + if (strong_try_module_get(fsa.owner)) + return NULL; } - if (strong_try_module_get(fsa.owner)) - goto fail; - preempt_enable(); return (void *)kernel_symbol_value(fsa.sym); -fail: - preempt_enable(); - return NULL; } EXPORT_SYMBOL_GPL(__symbol_get); @@ -1548,6 +1605,20 @@ static void __layout_sections(struct module *mod, struct load_info *info, bool i if (WARN_ON_ONCE(type == MOD_INVALID)) continue; + /* + * Do not allocate codetag memory as we load it into + * preallocated contiguous memory. + */ + if (codetag_needs_module_section(mod, sname, s->sh_size)) { + /* + * s->sh_entsize won't be used but populate the + * type field to avoid confusion. + */ + s->sh_entsize = ((unsigned long)(type) & SH_ENTSIZE_TYPE_MASK) + << SH_ENTSIZE_TYPE_SHIFT; + continue; + } + s->sh_entsize = module_get_offset_and_type(mod, type, s, i); pr_debug("\t%s\n", sname); } @@ -1590,7 +1661,7 @@ static void module_license_taint_check(struct module *mod, const char *license) static void setup_modinfo(struct module *mod, struct load_info *info) { - struct module_attribute *attr; + const struct module_attribute *attr; int i; for (i = 0; (attr = modinfo_attrs[i]); i++) { @@ -1601,7 +1672,7 @@ static void setup_modinfo(struct module *mod, struct load_info *info) static void free_modinfo(struct module *mod) { - struct module_attribute *attr; + const struct module_attribute *attr; int i; for (i = 0; (attr = modinfo_attrs[i]); i++) { @@ -1610,13 +1681,6 @@ static void free_modinfo(struct module *mod) } } -void * __weak module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - bool __weak module_init_section(const char *name) { return strstarts(name, ".init"); @@ -1627,7 +1691,7 @@ bool __weak module_exit_section(const char *name) return strstarts(name, ".exit"); } -static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) +static int validate_section_offset(const struct load_info *info, Elf_Shdr *shdr) { #if defined(CONFIG_64BIT) unsigned long long secend; @@ -1646,62 +1710,80 @@ static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) return 0; } -/* - * Check userspace passed ELF module against our expectations, and cache - * useful variables for further processing as we go. +/** + * elf_validity_ehdr() - Checks an ELF header for module validity + * @info: Load info containing the ELF header to check * - * This does basic validity checks against section offsets and sizes, the - * section name string table, and the indices used for it (sh_name). + * Checks whether an ELF header could belong to a valid module. Checks: * - * As a last step, since we're already checking the ELF sections we cache - * useful variables which will be used later for our convenience: + * * ELF header is within the data the user provided + * * ELF magic is present + * * It is relocatable (not final linked, not core file, etc.) + * * The header's machine type matches what the architecture expects. + * * Optional arch-specific hook for other properties + * - module_elf_check_arch() is currently only used by PPC to check + * ELF ABI version, but may be used by others in the future. * - * o pointers to section headers - * o cache the modinfo symbol section - * o cache the string symbol section - * o cache the module section - * - * As a last step we set info->mod to the temporary copy of the module in - * info->hdr. The final one will be allocated in move_module(). Any - * modifications we make to our copy of the module will be carried over - * to the final minted module. + * Return: %0 if valid, %-ENOEXEC on failure. */ -static int elf_validity_cache_copy(struct load_info *info, int flags) +static int elf_validity_ehdr(const struct load_info *info) { - unsigned int i; - Elf_Shdr *shdr, *strhdr; - int err; - unsigned int num_mod_secs = 0, mod_idx; - unsigned int num_info_secs = 0, info_idx; - unsigned int num_sym_secs = 0, sym_idx; - if (info->len < sizeof(*(info->hdr))) { pr_err("Invalid ELF header len %lu\n", info->len); - goto no_exec; + return -ENOEXEC; } - if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0) { pr_err("Invalid ELF header magic: != %s\n", ELFMAG); - goto no_exec; + return -ENOEXEC; } if (info->hdr->e_type != ET_REL) { pr_err("Invalid ELF header type: %u != %u\n", info->hdr->e_type, ET_REL); - goto no_exec; + return -ENOEXEC; } if (!elf_check_arch(info->hdr)) { pr_err("Invalid architecture in ELF header: %u\n", info->hdr->e_machine); - goto no_exec; + return -ENOEXEC; } if (!module_elf_check_arch(info->hdr)) { pr_err("Invalid module architecture in ELF header: %u\n", info->hdr->e_machine); - goto no_exec; + return -ENOEXEC; } + return 0; +} + +/** + * elf_validity_cache_sechdrs() - Cache section headers if valid + * @info: Load info to compute section headers from + * + * Checks: + * + * * ELF header is valid (see elf_validity_ehdr()) + * * Section headers are the size we expect + * * Section array fits in the user provided data + * * Section index 0 is NULL + * * Section contents are inbounds + * + * Then updates @info with a &load_info->sechdrs pointer if valid. + * + * Return: %0 if valid, negative error code if validation failed. + */ +static int elf_validity_cache_sechdrs(struct load_info *info) +{ + Elf_Shdr *sechdrs; + Elf_Shdr *shdr; + int i; + int err; + + err = elf_validity_ehdr(info); + if (err < 0) + return err; + if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) { pr_err("Invalid ELF section header size\n"); - goto no_exec; + return -ENOEXEC; } /* @@ -1713,10 +1795,66 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) || (info->hdr->e_shnum * sizeof(Elf_Shdr) > info->len - info->hdr->e_shoff)) { pr_err("Invalid ELF section header overflow\n"); - goto no_exec; + return -ENOEXEC; + } + + sechdrs = (void *)info->hdr + info->hdr->e_shoff; + + /* + * The code assumes that section 0 has a length of zero and + * an addr of zero, so check for it. + */ + if (sechdrs[0].sh_type != SHT_NULL + || sechdrs[0].sh_size != 0 + || sechdrs[0].sh_addr != 0) { + pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", + sechdrs[0].sh_type); + return -ENOEXEC; } - info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; + /* Validate contents are inbounds */ + for (i = 1; i < info->hdr->e_shnum; i++) { + shdr = &sechdrs[i]; + switch (shdr->sh_type) { + case SHT_NULL: + case SHT_NOBITS: + /* No contents, offset/size don't mean anything */ + continue; + default: + err = validate_section_offset(info, shdr); + if (err < 0) { + pr_err("Invalid ELF section in module (section %u type %u)\n", + i, shdr->sh_type); + return err; + } + } + } + + info->sechdrs = sechdrs; + + return 0; +} + +/** + * elf_validity_cache_secstrings() - Caches section names if valid + * @info: Load info to cache section names from. Must have valid sechdrs. + * + * Specifically checks: + * + * * Section name table index is inbounds of section headers + * * Section name table is not empty + * * Section name table is NUL terminated + * * All section name offsets are inbounds of the section + * + * Then updates @info with a &load_info->secstrings pointer if valid. + * + * Return: %0 if valid, negative error code if validation failed. + */ +static int elf_validity_cache_secstrings(struct load_info *info) +{ + Elf_Shdr *strhdr, *shdr; + char *secstrings; + int i; /* * Verify if the section name table index is valid. @@ -1726,158 +1864,409 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) pr_err("Invalid ELF section name index: %d || e_shstrndx (%d) >= e_shnum (%d)\n", info->hdr->e_shstrndx, info->hdr->e_shstrndx, info->hdr->e_shnum); - goto no_exec; + return -ENOEXEC; } strhdr = &info->sechdrs[info->hdr->e_shstrndx]; - err = validate_section_offset(info, strhdr); - if (err < 0) { - pr_err("Invalid ELF section hdr(type %u)\n", strhdr->sh_type); - return err; - } /* * The section name table must be NUL-terminated, as required * by the spec. This makes strcmp and pr_* calls that access * strings in the section safe. */ - info->secstrings = (void *)info->hdr + strhdr->sh_offset; + secstrings = (void *)info->hdr + strhdr->sh_offset; if (strhdr->sh_size == 0) { pr_err("empty section name table\n"); - goto no_exec; + return -ENOEXEC; } - if (info->secstrings[strhdr->sh_size - 1] != '\0') { + if (secstrings[strhdr->sh_size - 1] != '\0') { pr_err("ELF Spec violation: section name table isn't null terminated\n"); - goto no_exec; - } - - /* - * The code assumes that section 0 has a length of zero and - * an addr of zero, so check for it. - */ - if (info->sechdrs[0].sh_type != SHT_NULL - || info->sechdrs[0].sh_size != 0 - || info->sechdrs[0].sh_addr != 0) { - pr_err("ELF Spec violation: section 0 type(%d)!=SH_NULL or non-zero len or addr\n", - info->sechdrs[0].sh_type); - goto no_exec; + return -ENOEXEC; } - for (i = 1; i < info->hdr->e_shnum; i++) { + for (i = 0; i < info->hdr->e_shnum; i++) { shdr = &info->sechdrs[i]; - switch (shdr->sh_type) { - case SHT_NULL: - case SHT_NOBITS: + /* SHT_NULL means sh_name has an undefined value */ + if (shdr->sh_type == SHT_NULL) continue; - case SHT_SYMTAB: - if (shdr->sh_link == SHN_UNDEF - || shdr->sh_link >= info->hdr->e_shnum) { - pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", - shdr->sh_link, shdr->sh_link, - info->hdr->e_shnum); - goto no_exec; - } - num_sym_secs++; - sym_idx = i; - fallthrough; - default: - err = validate_section_offset(info, shdr); - if (err < 0) { - pr_err("Invalid ELF section in module (section %u type %u)\n", - i, shdr->sh_type); - return err; - } - if (strcmp(info->secstrings + shdr->sh_name, - ".gnu.linkonce.this_module") == 0) { - num_mod_secs++; - mod_idx = i; - } else if (strcmp(info->secstrings + shdr->sh_name, - ".modinfo") == 0) { - num_info_secs++; - info_idx = i; - } - - if (shdr->sh_flags & SHF_ALLOC) { - if (shdr->sh_name >= strhdr->sh_size) { - pr_err("Invalid ELF section name in module (section %u type %u)\n", - i, shdr->sh_type); - return -ENOEXEC; - } - } - break; + if (shdr->sh_name >= strhdr->sh_size) { + pr_err("Invalid ELF section name in module (section %u type %u)\n", + i, shdr->sh_type); + return -ENOEXEC; } } - if (num_info_secs > 1) { + info->secstrings = secstrings; + return 0; +} + +/** + * elf_validity_cache_index_info() - Validate and cache modinfo section + * @info: Load info to populate the modinfo index on. + * Must have &load_info->sechdrs and &load_info->secstrings populated + * + * Checks that if there is a .modinfo section, it is unique. + * Then, it caches its index in &load_info->index.info. + * Finally, it tries to populate the name to improve error messages. + * + * Return: %0 if valid, %-ENOEXEC if multiple modinfo sections were found. + */ +static int elf_validity_cache_index_info(struct load_info *info) +{ + int info_idx; + + info_idx = find_any_unique_sec(info, ".modinfo"); + + if (info_idx == 0) + /* Early return, no .modinfo */ + return 0; + + if (info_idx < 0) { pr_err("Only one .modinfo section must exist.\n"); - goto no_exec; - } else if (num_info_secs == 1) { - /* Try to find a name early so we can log errors with a module name */ - info->index.info = info_idx; - info->name = get_modinfo(info, "name"); + return -ENOEXEC; } - if (num_sym_secs != 1) { - pr_warn("%s: module has no symbols (stripped?)\n", - info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; - } + info->index.info = info_idx; + /* Try to find a name early so we can log errors with a module name */ + info->name = get_modinfo(info, "name"); - /* Sets internal symbols and strings. */ - info->index.sym = sym_idx; - shdr = &info->sechdrs[sym_idx]; - info->index.str = shdr->sh_link; - info->strtab = (char *)info->hdr + info->sechdrs[info->index.str].sh_offset; + return 0; +} - /* - * The ".gnu.linkonce.this_module" ELF section is special. It is - * what modpost uses to refer to __this_module and let's use rely - * on THIS_MODULE to point to &__this_module properly. The kernel's - * modpost declares it on each modules's *.mod.c file. If the struct - * module of the kernel changes a full kernel rebuild is required. - * - * We have a few expectaions for this special section, the following - * code validates all this for us: - * - * o Only one section must exist - * o We expect the kernel to always have to allocate it: SHF_ALLOC - * o The section size must match the kernel's run time's struct module - * size - */ - if (num_mod_secs != 1) { - pr_err("module %s: Only one .gnu.linkonce.this_module section must exist.\n", +/** + * elf_validity_cache_index_mod() - Validates and caches this_module section + * @info: Load info to cache this_module on. + * Must have &load_info->sechdrs and &load_info->secstrings populated + * + * The ".gnu.linkonce.this_module" ELF section is special. It is what modpost + * uses to refer to __this_module and let's use rely on THIS_MODULE to point + * to &__this_module properly. The kernel's modpost declares it on each + * modules's *.mod.c file. If the struct module of the kernel changes a full + * kernel rebuild is required. + * + * We have a few expectations for this special section, this function + * validates all this for us: + * + * * The section has contents + * * The section is unique + * * We expect the kernel to always have to allocate it: SHF_ALLOC + * * The section size must match the kernel's run time's struct module + * size + * + * If all checks pass, the index will be cached in &load_info->index.mod + * + * Return: %0 on validation success, %-ENOEXEC on failure + */ +static int elf_validity_cache_index_mod(struct load_info *info) +{ + Elf_Shdr *shdr; + int mod_idx; + + mod_idx = find_any_unique_sec(info, ".gnu.linkonce.this_module"); + if (mod_idx <= 0) { + pr_err("module %s: Exactly one .gnu.linkonce.this_module section must exist.\n", info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; + return -ENOEXEC; } shdr = &info->sechdrs[mod_idx]; - /* - * This is already implied on the switch above, however let's be - * pedantic about it. - */ if (shdr->sh_type == SHT_NOBITS) { pr_err("module %s: .gnu.linkonce.this_module section must have a size set\n", info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; + return -ENOEXEC; } if (!(shdr->sh_flags & SHF_ALLOC)) { pr_err("module %s: .gnu.linkonce.this_module must occupy memory during process execution\n", info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; + return -ENOEXEC; } if (shdr->sh_size != sizeof(struct module)) { pr_err("module %s: .gnu.linkonce.this_module section size must match the kernel's built struct module size at run time\n", info->name ?: "(missing .modinfo section or name field)"); - goto no_exec; + return -ENOEXEC; } info->index.mod = mod_idx; + return 0; +} + +/** + * elf_validity_cache_index_sym() - Validate and cache symtab index + * @info: Load info to cache symtab index in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * + * Checks that there is exactly one symbol table, then caches its index in + * &load_info->index.sym. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_sym(struct load_info *info) +{ + unsigned int sym_idx; + unsigned int num_sym_secs = 0; + int i; + + for (i = 1; i < info->hdr->e_shnum; i++) { + if (info->sechdrs[i].sh_type == SHT_SYMTAB) { + num_sym_secs++; + sym_idx = i; + } + } + + if (num_sym_secs != 1) { + pr_warn("%s: module has no symbols (stripped?)\n", + info->name ?: "(missing .modinfo section or name field)"); + return -ENOEXEC; + } + + info->index.sym = sym_idx; + + return 0; +} + +/** + * elf_validity_cache_index_str() - Validate and cache strtab index + * @info: Load info to cache strtab index in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * Must have &load_info->index.sym populated. + * + * Looks at the symbol table's associated string table, makes sure it is + * in-bounds, and caches it. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_str(struct load_info *info) +{ + unsigned int str_idx = info->sechdrs[info->index.sym].sh_link; + + if (str_idx == SHN_UNDEF || str_idx >= info->hdr->e_shnum) { + pr_err("Invalid ELF sh_link!=SHN_UNDEF(%d) or (sh_link(%d) >= hdr->e_shnum(%d)\n", + str_idx, str_idx, info->hdr->e_shnum); + return -ENOEXEC; + } + + info->index.str = str_idx; + return 0; +} + +/** + * elf_validity_cache_index_versions() - Validate and cache version indices + * @info: Load info to cache version indices in. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * @flags: Load flags, relevant to suppress version loading, see + * uapi/linux/module.h + * + * If we're ignoring modversions based on @flags, zero all version indices + * and return validity. Othewrise check: + * + * * If "__version_ext_crcs" is present, "__version_ext_names" is present + * * There is a name present for every crc + * + * Then populate: + * + * * &load_info->index.vers + * * &load_info->index.vers_ext_crc + * * &load_info->index.vers_ext_names + * + * if present. + * + * Return: %0 if valid, %-ENOEXEC on failure. + */ +static int elf_validity_cache_index_versions(struct load_info *info, int flags) +{ + unsigned int vers_ext_crc; + unsigned int vers_ext_name; + size_t crc_count; + size_t remaining_len; + size_t name_size; + char *name; + + /* If modversions were suppressed, pretend we didn't find any */ + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) { + info->index.vers = 0; + info->index.vers_ext_crc = 0; + info->index.vers_ext_name = 0; + return 0; + } + + vers_ext_crc = find_sec(info, "__version_ext_crcs"); + vers_ext_name = find_sec(info, "__version_ext_names"); + + /* If we have one field, we must have the other */ + if (!!vers_ext_crc != !!vers_ext_name) { + pr_err("extended version crc+name presence does not match"); + return -ENOEXEC; + } + + /* + * If we have extended version information, we should have the same + * number of entries in every section. + */ + if (vers_ext_crc) { + crc_count = info->sechdrs[vers_ext_crc].sh_size / sizeof(u32); + name = (void *)info->hdr + + info->sechdrs[vers_ext_name].sh_offset; + remaining_len = info->sechdrs[vers_ext_name].sh_size; + + while (crc_count--) { + name_size = strnlen(name, remaining_len) + 1; + if (name_size > remaining_len) { + pr_err("more extended version crcs than names"); + return -ENOEXEC; + } + remaining_len -= name_size; + name += name_size; + } + } + + info->index.vers = find_sec(info, "__versions"); + info->index.vers_ext_crc = vers_ext_crc; + info->index.vers_ext_name = vers_ext_name; + return 0; +} + +/** + * elf_validity_cache_index() - Resolve, validate, cache section indices + * @info: Load info to read from and update. + * &load_info->sechdrs and &load_info->secstrings must be populated. + * @flags: Load flags, relevant to suppress version loading, see + * uapi/linux/module.h + * + * Populates &load_info->index, validating as it goes. + * See child functions for per-field validation: + * + * * elf_validity_cache_index_info() + * * elf_validity_cache_index_mod() + * * elf_validity_cache_index_sym() + * * elf_validity_cache_index_str() + * * elf_validity_cache_index_versions() + * + * If CONFIG_SMP is enabled, load the percpu section by name with no + * validation. + * + * Return: 0 on success, negative error code if an index failed validation. + */ +static int elf_validity_cache_index(struct load_info *info, int flags) +{ + int err; + + err = elf_validity_cache_index_info(info); + if (err < 0) + return err; + err = elf_validity_cache_index_mod(info); + if (err < 0) + return err; + err = elf_validity_cache_index_sym(info); + if (err < 0) + return err; + err = elf_validity_cache_index_str(info); + if (err < 0) + return err; + err = elf_validity_cache_index_versions(info, flags); + if (err < 0) + return err; + + info->index.pcpu = find_pcpusec(info); + + return 0; +} + +/** + * elf_validity_cache_strtab() - Validate and cache symbol string table + * @info: Load info to read from and update. + * Must have &load_info->sechdrs and &load_info->secstrings populated. + * Must have &load_info->index populated. + * + * Checks: + * + * * The string table is not empty. + * * The string table starts and ends with NUL (required by ELF spec). + * * Every &Elf_Sym->st_name offset in the symbol table is inbounds of the + * string table. + * + * And caches the pointer as &load_info->strtab in @info. + * + * Return: 0 on success, negative error code if a check failed. + */ +static int elf_validity_cache_strtab(struct load_info *info) +{ + Elf_Shdr *str_shdr = &info->sechdrs[info->index.str]; + Elf_Shdr *sym_shdr = &info->sechdrs[info->index.sym]; + char *strtab = (char *)info->hdr + str_shdr->sh_offset; + Elf_Sym *syms = (void *)info->hdr + sym_shdr->sh_offset; + int i; + + if (str_shdr->sh_size == 0) { + pr_err("empty symbol string table\n"); + return -ENOEXEC; + } + if (strtab[0] != '\0') { + pr_err("symbol string table missing leading NUL\n"); + return -ENOEXEC; + } + if (strtab[str_shdr->sh_size - 1] != '\0') { + pr_err("symbol string table isn't NUL terminated\n"); + return -ENOEXEC; + } + + /* + * Now that we know strtab is correctly structured, check symbol + * starts are inbounds before they're used later. + */ + for (i = 0; i < sym_shdr->sh_size / sizeof(*syms); i++) { + if (syms[i].st_name >= str_shdr->sh_size) { + pr_err("symbol name out of bounds in string table"); + return -ENOEXEC; + } + } + + info->strtab = strtab; + return 0; +} + +/* + * Check userspace passed ELF module against our expectations, and cache + * useful variables for further processing as we go. + * + * This does basic validity checks against section offsets and sizes, the + * section name string table, and the indices used for it (sh_name). + * + * As a last step, since we're already checking the ELF sections we cache + * useful variables which will be used later for our convenience: + * + * o pointers to section headers + * o cache the modinfo symbol section + * o cache the string symbol section + * o cache the module section + * + * As a last step we set info->mod to the temporary copy of the module in + * info->hdr. The final one will be allocated in move_module(). Any + * modifications we make to our copy of the module will be carried over + * to the final minted module. + */ +static int elf_validity_cache_copy(struct load_info *info, int flags) +{ + int err; + + err = elf_validity_cache_sechdrs(info); + if (err < 0) + return err; + err = elf_validity_cache_secstrings(info); + if (err < 0) + return err; + err = elf_validity_cache_index(info, flags); + if (err < 0) + return err; + err = elf_validity_cache_strtab(info); + if (err < 0) + return err; + /* This is temporary: point mod into copy of data. */ - info->mod = (void *)info->hdr + shdr->sh_offset; + info->mod = (void *)info->hdr + info->sechdrs[info->index.mod].sh_offset; /* * If we didn't load the .modinfo 'name' field earlier, fall back to @@ -1886,17 +2275,7 @@ static int elf_validity_cache_copy(struct load_info *info, int flags) if (!info->name) info->name = info->mod->name; - if (flags & MODULE_INIT_IGNORE_MODVERSIONS) - info->index.vers = 0; /* Pretend no __versions section! */ - else - info->index.vers = find_sec(info, "__versions"); - - info->index.pcpu = find_pcpusec(info); - return 0; - -no_exec: - return -ENOEXEC; } #define COPY_CHUNK_SIZE (16*PAGE_SIZE) @@ -2000,16 +2379,29 @@ static int rewrite_section_headers(struct load_info *info, int flags) /* Track but don't keep modinfo and version sections. */ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_crc].sh_flags &= + ~(unsigned long)SHF_ALLOC; + info->sechdrs[info->index.vers_ext_name].sh_flags &= + ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; return 0; } +static const char *const module_license_offenders[] = { + /* driverloader was caught wrongly pretending to be under GPL */ + "driverloader", + + /* lve claims to be GPL but upstream won't provide source */ + "lve", +}; + /* * These calls taint the kernel depending certain module circumstances */ static void module_augment_kernel_taints(struct module *mod, struct load_info *info) { int prev_taint = test_taint(TAINT_PROPRIETARY_MODULE); + size_t i; if (!get_modinfo(info, "intree")) { if (!test_taint(TAINT_OOT_MODULE)) @@ -2058,15 +2450,11 @@ static void module_augment_kernel_taints(struct module *mod, struct load_info *i if (strcmp(mod->name, "ndiswrapper") == 0) add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE); - /* driverloader was caught wrongly pretending to be under GPL */ - if (strcmp(mod->name, "driverloader") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE, - LOCKDEP_NOW_UNRELIABLE); - - /* lve claims to be GPL but upstream won't provide source */ - if (strcmp(mod->name, "lve") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE, - LOCKDEP_NOW_UNRELIABLE); + for (i = 0; i < ARRAY_SIZE(module_license_offenders); ++i) { + if (strcmp(mod->name, module_license_offenders[i]) == 0) + add_taint_module(mod, TAINT_PROPRIETARY_MODULE, + LOCKDEP_NOW_UNRELIABLE); + } if (!prev_taint && test_taint(TAINT_PROPRIETARY_MODULE)) pr_warn("%s: module license taints kernel.\n", mod->name); @@ -2148,6 +2536,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_DEBUG_INFO_BTF_MODULES mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size); + mod->btf_base_data = any_section_objs(info, ".BTF.base", 1, + &mod->btf_base_data_size); #endif #ifdef CONFIG_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", @@ -2225,35 +2615,21 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { int i; - void *ptr; enum mod_mem_type t = 0; int ret = -ENOMEM; + bool codetag_section_found = false; for_each_mod_mem_type(type) { if (!mod->mem[type].size) { mod->mem[type].base = NULL; continue; } - mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size); - ptr = module_memory_alloc(mod->mem[type].size, type); - /* - * The pointer to these blocks of memory are stored on the module - * structure and we keep that around so long as the module is - * around. We only free that memory when we unload the module. - * Just mark them as not being a leak then. The .init* ELF - * sections *do* get freed after boot so we *could* treat them - * slightly differently with kmemleak_ignore() and only grey - * them out as they work as typical memory allocations which - * *do* eventually get freed, but let's just keep things simple - * and avoid *any* false positives. - */ - kmemleak_not_leak(ptr); - if (!ptr) { + + ret = module_memory_alloc(mod, type); + if (ret) { t = type; - goto out_enomem; + goto out_err; } - memset(ptr, 0, mod->mem[type].size); - mod->mem[type].base = ptr; } /* Transfer each section which specifies SHF_ALLOC */ @@ -2261,12 +2637,34 @@ static int move_module(struct module *mod, struct load_info *info) for (i = 0; i < info->hdr->e_shnum; i++) { void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; - enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; + const char *sname; if (!(shdr->sh_flags & SHF_ALLOC)) continue; - dest = mod->mem[type].base + (shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK); + sname = info->secstrings + shdr->sh_name; + /* + * Load codetag sections separately as they might still be used + * after module unload. + */ + if (codetag_needs_module_section(mod, sname, shdr->sh_size)) { + dest = codetag_alloc_module_section(mod, sname, shdr->sh_size, + arch_mod_section_prepend(mod, i), shdr->sh_addralign); + if (WARN_ON(!dest)) { + ret = -EINVAL; + goto out_err; + } + if (IS_ERR(dest)) { + ret = PTR_ERR(dest); + goto out_err; + } + codetag_section_found = true; + } else { + enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; + unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK; + + dest = mod->mem[type].base + offset; + } if (shdr->sh_type != SHT_NOBITS) { /* @@ -2278,7 +2676,7 @@ static int move_module(struct module *mod, struct load_info *info) if (i == info->index.mod && (WARN_ON_ONCE(shdr->sh_size != sizeof(struct module)))) { ret = -ENOEXEC; - goto out_enomem; + goto out_err; } memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size); } @@ -2294,9 +2692,13 @@ static int move_module(struct module *mod, struct load_info *info) } return 0; -out_enomem: +out_err: + module_memory_restore_rox(mod); for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod, t); + if (codetag_section_found) + codetag_free_module_sections(mod); + return ret; } @@ -2417,6 +2819,8 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) /* Module has been copied to its final place now: return it. */ mod = (void *)info->sechdrs[info->index.mod].sh_addr; kmemleak_load_module(mod, info); + codetag_module_replaced(info->mod, mod); + return mod; } @@ -2425,6 +2829,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); + codetag_free_module_sections(mod); free_mod_mem(mod); } @@ -2482,9 +2887,9 @@ static void do_free_init(struct work_struct *w) llist_for_each_safe(pos, n, list) { initfree = container_of(pos, struct mod_initfree, node); - module_memfree(initfree->init_text); - module_memfree(initfree->init_data); - module_memfree(initfree->init_rodata); + execmem_free(initfree->init_text); + execmem_free(initfree->init_data); + execmem_free(initfree->init_rodata); kfree(initfree); } } @@ -2576,9 +2981,12 @@ static noinline int do_init_module(struct module *mod) /* Switch to core kallsyms now init is done: kallsyms may be walking! */ rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif - ret = module_enable_rodata_ro(mod, true); + ret = module_enable_rodata_ro_after_init(mod); if (ret) - goto fail_mutex_unlock; + pr_warn("%s: module_enable_rodata_ro_after_init() returned %d, " + "ro_after_init data might still be writable\n", + mod->name, ret); + mod_tree_remove_init(mod); module_arch_freeing_init(mod); for_class_mod_mem_type(type, init) { @@ -2587,17 +2995,18 @@ static noinline int do_init_module(struct module *mod) } #ifdef CONFIG_DEBUG_INFO_BTF_MODULES - /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointer */ + /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointers */ mod->btf_data = NULL; + mod->btf_base_data = NULL; #endif /* * We want to free module_init, but be aware that kallsyms may be - * walking this with preempt disabled. In all the failure paths, we + * walking this within an RCU read section. In all the failure paths, we * call synchronize_rcu(), but we don't want to slow down the success - * path. module_memfree() cannot be called in an interrupt, so do the + * path. execmem_free() cannot be called in an interrupt, so do the * work and call synchronize_rcu() in a work queue. * - * Note that module_alloc() on most architectures creates W+X page + * Note that execmem_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work by invoking @@ -2616,8 +3025,6 @@ static noinline int do_init_module(struct module *mod) return 0; -fail_mutex_unlock: - mutex_unlock(&module_mutex); fail_free_freeinit: kfree(freeinit); fail: @@ -2745,7 +3152,7 @@ static int complete_formation(struct module *mod, struct load_info *info) module_bug_finalize(info->hdr, info->sechdrs, mod); module_cfi_finalize(info->hdr, info->sechdrs, mod); - err = module_enable_rodata_ro(mod, false); + err = module_enable_rodata_ro(mod); if (err) goto out_strict_rwx; err = module_enable_data_nx(mod); @@ -2995,6 +3402,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Get rid of temporary copy. */ free_copy(info, flags); + codetag_load_module(mod); + /* Done! */ trace_module_load(mod); @@ -3042,6 +3451,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod->mem[type].size); } + module_memory_restore_rox(mod); module_deallocate(mod, info); free_copy: /* @@ -3096,7 +3506,7 @@ static bool idempotent(struct idempotent *u, const void *cookie) struct idempotent *existing; bool first; - u->ret = 0; + u->ret = -EINTR; u->cookie = cookie; init_completion(&u->complete); @@ -3132,7 +3542,7 @@ static int idempotent_complete(struct idempotent *u, int ret) hlist_for_each_entry_safe(pos, next, head, entry) { if (pos->cookie != cookie) continue; - hlist_del(&pos->entry); + hlist_del_init(&pos->entry); pos->ret = ret; complete(&pos->complete); } @@ -3140,6 +3550,28 @@ static int idempotent_complete(struct idempotent *u, int ret) return ret; } +/* + * Wait for the idempotent worker. + * + * If we get interrupted, we need to remove ourselves from the + * the idempotent list, and the completion may still come in. + * + * The 'idem_lock' protects against the race, and 'idem.ret' was + * initialized to -EINTR and is thus always the right return + * value even if the idempotent work then completes between + * the wait_for_completion and the cleanup. + */ +static int idempotent_wait_for_completion(struct idempotent *u) +{ + if (wait_for_completion_interruptible(&u->complete)) { + spin_lock(&idem_lock); + if (!hlist_unhashed(&u->entry)) + hlist_del(&u->entry); + spin_unlock(&idem_lock); + } + return u->ret; +} + static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { struct load_info info = { }; @@ -3172,26 +3604,24 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int { struct idempotent idem; - if (!f || !(f->f_mode & FMODE_READ)) + if (!(f->f_mode & FMODE_READ)) return -EBADF; - /* See if somebody else is doing the operation? */ - if (idempotent(&idem, file_inode(f))) { - wait_for_completion(&idem.complete); - return idem.ret; + /* Are we the winners of the race and get to do this? */ + if (!idempotent(&idem, file_inode(f))) { + int ret = init_module_from_file(f, uargs, flags); + return idempotent_complete(&idem, ret); } - /* Otherwise, we'll do it and complete others */ - return idempotent_complete(&idem, - init_module_from_file(f, uargs, flags)); + /* + * Somebody else won the race and is loading the module. + */ + return idempotent_wait_for_completion(&idem); } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { - int err; - struct fd f; - - err = may_init_module(); + int err = may_init_module(); if (err) return err; @@ -3202,10 +3632,10 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) |MODULE_INIT_COMPRESSED_FILE)) return -EINVAL; - f = fdget(fd); - err = idempotent_init_module(f.file, uargs, flags); - fdput(f); - return err; + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; + return idempotent_init_module(fd_file(f), uargs, flags); } /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */ @@ -3238,28 +3668,23 @@ out: /* Given an address, look for it in the module exception tables. */ const struct exception_table_entry *search_module_extables(unsigned long addr) { - const struct exception_table_entry *e = NULL; struct module *mod; - preempt_disable(); + guard(rcu)(); mod = __module_address(addr); if (!mod) - goto out; + return NULL; if (!mod->num_exentries) - goto out; - - e = search_extable(mod->extable, - mod->num_exentries, - addr); -out: - preempt_enable(); - + return NULL; /* - * Now, if we found one, we are running inside it now, hence - * we cannot unload the module, hence no refcnt needed. + * The address passed here belongs to a module that is currently + * invoked (we are running inside it). Therefore its module::refcnt + * needs already be >0 to ensure that it is not removed at this stage. + * All other user need to invoke this function within a RCU read + * section. */ - return e; + return search_extable(mod->extable, mod->num_exentries, addr); } /** @@ -3271,20 +3696,15 @@ out: */ bool is_module_address(unsigned long addr) { - bool ret; - - preempt_disable(); - ret = __module_address(addr) != NULL; - preempt_enable(); - - return ret; + guard(rcu)(); + return __module_address(addr) != NULL; } /** * __module_address() - get the module which contains an address. * @addr: the address. * - * Must be called with preempt disabled or module mutex held so that + * Must be called within RCU read section or module mutex held so that * module doesn't get freed during this. */ struct module *__module_address(unsigned long addr) @@ -3302,8 +3722,6 @@ struct module *__module_address(unsigned long addr) return NULL; lookup: - module_assert_mutex_or_preempt(); - mod = mod_find(addr, &mod_tree); if (mod) { BUG_ON(!within_module(addr, mod)); @@ -3323,20 +3741,28 @@ lookup: */ bool is_module_text_address(unsigned long addr) { - bool ret; + guard(rcu)(); + return __module_text_address(addr) != NULL; +} - preempt_disable(); - ret = __module_text_address(addr) != NULL; - preempt_enable(); +void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data) +{ + struct module *mod; - return ret; + guard(rcu)(); + list_for_each_entry_rcu(mod, &modules, list) { + if (mod->state == MODULE_STATE_UNFORMED) + continue; + if (func(mod, data)) + break; + } } /** * __module_text_address() - get the module whose code contains an address. * @addr: the address. * - * Must be called with preempt disabled or module mutex held so that + * Must be called within RCU read section or module mutex held so that * module doesn't get freed during this. */ struct module *__module_text_address(unsigned long addr) @@ -3359,7 +3785,7 @@ void print_modules(void) printk(KERN_DEFAULT "Modules linked in:"); /* Most callers should already have preempt disabled, but make sure */ - preempt_disable(); + guard(rcu)(); list_for_each_entry_rcu(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -3367,7 +3793,6 @@ void print_modules(void) } print_unloaded_tainted_modules(); - preempt_enable(); if (last_unloaded_module.name[0]) pr_cont(" [last unloaded: %s%s]", last_unloaded_module.name, last_unloaded_module.taints); diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index c45caa4690e5..03f4142cfbf4 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> +#include <linux/execmem.h> #include "internal.h" static int module_set_memory(const struct module *mod, enum mod_mem_type type, @@ -32,9 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type, int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) { + const struct module_memory *mem = &mod->mem[type]; int ret; - if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + if (mem->is_rox) + ret = execmem_restore_rox(mem->base, mem->size); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox); else ret = module_set_memory(mod, type, set_memory_x); @@ -44,7 +48,7 @@ int module_enable_text_rox(const struct module *mod) return 0; } -int module_enable_rodata_ro(const struct module *mod, bool after_init) +int module_enable_rodata_ro(const struct module *mod) { int ret; @@ -58,12 +62,17 @@ int module_enable_rodata_ro(const struct module *mod, bool after_init) if (ret) return ret; - if (after_init) - return module_set_memory(mod, MOD_RO_AFTER_INIT, set_memory_ro); - return 0; } +int module_enable_rodata_ro_after_init(const struct module *mod) +{ + if (!IS_ENABLED(CONFIG_STRICT_MODULE_RWX) || !rodata_enabled) + return 0; + + return module_set_memory(mod, MOD_RO_AFTER_INIT, set_memory_ro); +} + int module_enable_data_nx(const struct module *mod) { if (!IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c index d964167c6658..b401ff4b02d2 100644 --- a/kernel/module/sysfs.c +++ b/kernel/module/sysfs.c @@ -19,24 +19,16 @@ * J. Corbet <corbet@lwn.net> */ #ifdef CONFIG_KALLSYMS -struct module_sect_attr { - struct bin_attribute battr; - unsigned long address; -}; - struct module_sect_attrs { struct attribute_group grp; - unsigned int nsections; - struct module_sect_attr attrs[]; + struct bin_attribute attrs[]; }; #define MODULE_SECT_READ_SIZE (3 /* "0x", "\n" */ + (BITS_PER_LONG / 4)) static ssize_t module_sect_read(struct file *file, struct kobject *kobj, - struct bin_attribute *battr, + const struct bin_attribute *battr, char *buf, loff_t pos, size_t count) { - struct module_sect_attr *sattr = - container_of(battr, struct module_sect_attr, battr); char bounce[MODULE_SECT_READ_SIZE + 1]; size_t wrote; @@ -53,7 +45,7 @@ static ssize_t module_sect_read(struct file *file, struct kobject *kobj, */ wrote = scnprintf(bounce, sizeof(bounce), "0x%px\n", kallsyms_show_value(file->f_cred) - ? (void *)sattr->address : NULL); + ? battr->private : NULL); count = min(count, wrote); memcpy(buf, bounce, count); @@ -62,64 +54,69 @@ static ssize_t module_sect_read(struct file *file, struct kobject *kobj, static void free_sect_attrs(struct module_sect_attrs *sect_attrs) { - unsigned int section; + const struct bin_attribute *const *bin_attr; - for (section = 0; section < sect_attrs->nsections; section++) - kfree(sect_attrs->attrs[section].battr.attr.name); + for (bin_attr = sect_attrs->grp.bin_attrs_new; *bin_attr; bin_attr++) + kfree((*bin_attr)->attr.name); + kfree(sect_attrs->grp.bin_attrs_new); kfree(sect_attrs); } -static void add_sect_attrs(struct module *mod, const struct load_info *info) +static int add_sect_attrs(struct module *mod, const struct load_info *info) { - unsigned int nloaded = 0, i, size[2]; struct module_sect_attrs *sect_attrs; - struct module_sect_attr *sattr; - struct bin_attribute **gattr; + const struct bin_attribute **gattr; + struct bin_attribute *sattr; + unsigned int nloaded = 0, i; + int ret; /* Count loaded sections and allocate structures */ for (i = 0; i < info->hdr->e_shnum; i++) if (!sect_empty(&info->sechdrs[i])) nloaded++; - size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded), - sizeof(sect_attrs->grp.bin_attrs[0])); - size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]); - sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL); + sect_attrs = kzalloc(struct_size(sect_attrs, attrs, nloaded), GFP_KERNEL); if (!sect_attrs) - return; + return -ENOMEM; + + gattr = kcalloc(nloaded + 1, sizeof(*gattr), GFP_KERNEL); + if (!gattr) { + kfree(sect_attrs); + return -ENOMEM; + } /* Setup section attributes. */ sect_attrs->grp.name = "sections"; - sect_attrs->grp.bin_attrs = (void *)sect_attrs + size[0]; + sect_attrs->grp.bin_attrs_new = gattr; - sect_attrs->nsections = 0; sattr = §_attrs->attrs[0]; - gattr = §_attrs->grp.bin_attrs[0]; for (i = 0; i < info->hdr->e_shnum; i++) { Elf_Shdr *sec = &info->sechdrs[i]; if (sect_empty(sec)) continue; - sysfs_bin_attr_init(&sattr->battr); - sattr->address = sec->sh_addr; - sattr->battr.attr.name = + sysfs_bin_attr_init(sattr); + sattr->attr.name = kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL); - if (!sattr->battr.attr.name) + if (!sattr->attr.name) { + ret = -ENOMEM; goto out; - sect_attrs->nsections++; - sattr->battr.read = module_sect_read; - sattr->battr.size = MODULE_SECT_READ_SIZE; - sattr->battr.attr.mode = 0400; - *(gattr++) = &(sattr++)->battr; + } + sattr->read_new = module_sect_read; + sattr->private = (void *)sec->sh_addr; + sattr->size = MODULE_SECT_READ_SIZE; + sattr->attr.mode = 0400; + *(gattr++) = sattr++; } - *gattr = NULL; - if (sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp)) + ret = sysfs_create_group(&mod->mkobj.kobj, §_attrs->grp); + if (ret) goto out; mod->sect_attrs = sect_attrs; - return; + return 0; out: free_sect_attrs(sect_attrs); + return ret; } static void remove_sect_attrs(struct module *mod) @@ -141,43 +138,23 @@ static void remove_sect_attrs(struct module *mod) */ struct module_notes_attrs { - struct kobject *dir; - unsigned int notes; - struct bin_attribute attrs[] __counted_by(notes); + struct attribute_group grp; + struct bin_attribute attrs[]; }; -static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) +static void free_notes_attrs(struct module_notes_attrs *notes_attrs) { - /* - * The caller checked the pos and count against our size. - */ - memcpy(buf, bin_attr->private + pos, count); - return count; -} - -static void free_notes_attrs(struct module_notes_attrs *notes_attrs, - unsigned int i) -{ - if (notes_attrs->dir) { - while (i-- > 0) - sysfs_remove_bin_file(notes_attrs->dir, - ¬es_attrs->attrs[i]); - kobject_put(notes_attrs->dir); - } + kfree(notes_attrs->grp.bin_attrs_new); kfree(notes_attrs); } -static void add_notes_attrs(struct module *mod, const struct load_info *info) +static int add_notes_attrs(struct module *mod, const struct load_info *info) { unsigned int notes, loaded, i; struct module_notes_attrs *notes_attrs; + const struct bin_attribute **gattr; struct bin_attribute *nattr; - - /* failed to create section attributes, so can't create notes */ - if (!mod->sect_attrs) - return; + int ret; /* Count notes sections and allocate structures. */ notes = 0; @@ -187,56 +164,74 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info) ++notes; if (notes == 0) - return; + return 0; notes_attrs = kzalloc(struct_size(notes_attrs, attrs, notes), GFP_KERNEL); if (!notes_attrs) - return; + return -ENOMEM; + + gattr = kcalloc(notes + 1, sizeof(*gattr), GFP_KERNEL); + if (!gattr) { + kfree(notes_attrs); + return -ENOMEM; + } + + notes_attrs->grp.name = "notes"; + notes_attrs->grp.bin_attrs_new = gattr; - notes_attrs->notes = notes; nattr = ¬es_attrs->attrs[0]; for (loaded = i = 0; i < info->hdr->e_shnum; ++i) { if (sect_empty(&info->sechdrs[i])) continue; if (info->sechdrs[i].sh_type == SHT_NOTE) { sysfs_bin_attr_init(nattr); - nattr->attr.name = mod->sect_attrs->attrs[loaded].battr.attr.name; + nattr->attr.name = mod->sect_attrs->attrs[loaded].attr.name; nattr->attr.mode = 0444; nattr->size = info->sechdrs[i].sh_size; nattr->private = (void *)info->sechdrs[i].sh_addr; - nattr->read = module_notes_read; - ++nattr; + nattr->read_new = sysfs_bin_attr_simple_read; + *(gattr++) = nattr++; } ++loaded; } - notes_attrs->dir = kobject_create_and_add("notes", &mod->mkobj.kobj); - if (!notes_attrs->dir) + ret = sysfs_create_group(&mod->mkobj.kobj, ¬es_attrs->grp); + if (ret) goto out; - for (i = 0; i < notes; ++i) - if (sysfs_create_bin_file(notes_attrs->dir, - ¬es_attrs->attrs[i])) - goto out; - mod->notes_attrs = notes_attrs; - return; + return 0; out: - free_notes_attrs(notes_attrs, i); + free_notes_attrs(notes_attrs); + return ret; } static void remove_notes_attrs(struct module *mod) { - if (mod->notes_attrs) - free_notes_attrs(mod->notes_attrs, mod->notes_attrs->notes); + if (mod->notes_attrs) { + sysfs_remove_group(&mod->mkobj.kobj, + &mod->notes_attrs->grp); + /* + * We are positive that no one is using any notes attrs + * at this point. Deallocate immediately. + */ + free_notes_attrs(mod->notes_attrs); + mod->notes_attrs = NULL; + } } #else /* !CONFIG_KALLSYMS */ -static inline void add_sect_attrs(struct module *mod, const struct load_info *info) { } +static inline int add_sect_attrs(struct module *mod, const struct load_info *info) +{ + return 0; +} static inline void remove_sect_attrs(struct module *mod) { } -static inline void add_notes_attrs(struct module *mod, const struct load_info *info) { } +static inline int add_notes_attrs(struct module *mod, const struct load_info *info) +{ + return 0; +} static inline void remove_notes_attrs(struct module *mod) { } #endif /* CONFIG_KALLSYMS */ @@ -274,7 +269,7 @@ static int add_usage_links(struct module *mod) static void module_remove_modinfo_attrs(struct module *mod, int end) { - struct module_attribute *attr; + const struct module_attribute *attr; int i; for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) { @@ -292,7 +287,7 @@ static void module_remove_modinfo_attrs(struct module *mod, int end) static int module_add_modinfo_attrs(struct module *mod) { - struct module_attribute *attr; + const struct module_attribute *attr; struct module_attribute *temp_attr; int error = 0; int i; @@ -396,11 +391,20 @@ int mod_sysfs_setup(struct module *mod, if (err) goto out_unreg_modinfo_attrs; - add_sect_attrs(mod, info); - add_notes_attrs(mod, info); + err = add_sect_attrs(mod, info); + if (err) + goto out_del_usage_links; + + err = add_notes_attrs(mod, info); + if (err) + goto out_unreg_sect_attrs; return 0; +out_unreg_sect_attrs: + remove_sect_attrs(mod); +out_del_usage_links: + del_usage_links(mod); out_unreg_modinfo_attrs: module_remove_modinfo_attrs(mod, -1); out_unreg_param: diff --git a/kernel/module/tracking.c b/kernel/module/tracking.c index 16742d1c630c..4fefec5b683c 100644 --- a/kernel/module/tracking.c +++ b/kernel/module/tracking.c @@ -21,8 +21,6 @@ int try_add_tainted_module(struct module *mod) { struct mod_unload_taint *mod_taint; - module_assert_mutex_or_preempt(); - if (!mod->taints) goto out; diff --git a/kernel/module/tree_lookup.c b/kernel/module/tree_lookup.c index 277197977d43..d3204c5c74eb 100644 --- a/kernel/module/tree_lookup.c +++ b/kernel/module/tree_lookup.c @@ -12,11 +12,11 @@ /* * Use a latched RB-tree for __module_address(); this allows us to use - * RCU-sched lookups of the address from any context. + * RCU lookups of the address from any context. * - * This is conditional on PERF_EVENTS || TRACING because those can really hit - * __module_address() hard by doing a lot of stack unwinding; potentially from - * NMI context. + * This is conditional on PERF_EVENTS || TRACING || CFI_CLANG because those can + * really hit __module_address() hard by doing a lot of stack unwinding; + * potentially from NMI context. */ static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) diff --git a/kernel/module/version.c b/kernel/module/version.c index 53f43ac5a73e..2beefeba82d9 100644 --- a/kernel/module/version.c +++ b/kernel/module/version.c @@ -13,17 +13,34 @@ int check_version(const struct load_info *info, const char *symname, struct module *mod, - const s32 *crc) + const u32 *crc) { Elf_Shdr *sechdrs = info->sechdrs; unsigned int versindex = info->index.vers; unsigned int i, num_versions; struct modversion_info *versions; + struct modversion_info_ext version_ext; /* Exporting module didn't supply crcs? OK, we're already tainted. */ if (!crc) return 1; + /* If we have extended version info, rely on it */ + if (info->index.vers_ext_crc) { + for_each_modversion_info_ext(version_ext, info) { + if (strcmp(version_ext.name, symname) != 0) + continue; + if (*version_ext.crc == *crc) + return 1; + pr_debug("Found checksum %X vs module %X\n", + *crc, *version_ext.crc); + goto bad_version; + } + pr_warn_once("%s: no extended symbol version for %s\n", + info->name, symname); + return 1; + } + /* No versions at all? modprobe --force does this. */ if (versindex == 0) return try_to_force_load(mod, symname) == 0; @@ -62,17 +79,17 @@ int check_modstruct_version(const struct load_info *info, .name = "module_layout", .gplok = true, }; + bool have_symbol; /* * Since this should be found in kernel (which can't be removed), no - * locking is necessary -- use preempt_disable() to placate lockdep. + * locking is necessary. Regardless use a RCU read section to keep + * lockdep happy. */ - preempt_disable(); - if (!find_symbol(&fsa)) { - preempt_enable(); - BUG(); - } - preempt_enable(); + scoped_guard(rcu) + have_symbol = find_symbol(&fsa); + BUG_ON(!have_symbol); + return check_version(info, "module_layout", mod, fsa.crc); } @@ -87,6 +104,34 @@ int same_magic(const char *amagic, const char *bmagic, return strcmp(amagic, bmagic) == 0; } +void modversion_ext_start(const struct load_info *info, + struct modversion_info_ext *start) +{ + unsigned int crc_idx = info->index.vers_ext_crc; + unsigned int name_idx = info->index.vers_ext_name; + Elf_Shdr *sechdrs = info->sechdrs; + + /* + * Both of these fields are needed for this to be useful + * Any future fields should be initialized to NULL if absent. + */ + if (crc_idx == 0 || name_idx == 0) { + start->remaining = 0; + return; + } + + start->crc = (const u32 *)sechdrs[crc_idx].sh_addr; + start->name = (const char *)sechdrs[name_idx].sh_addr; + start->remaining = sechdrs[crc_idx].sh_size / sizeof(*start->crc); +} + +void modversion_ext_advance(struct modversion_info_ext *vers) +{ + vers->remaining--; + vers->crc++; + vers->name += strlen(vers->name) + 1; +} + /* * Generate the signature for all relevant module structures here. * If these change, we don't want to try to parse the module. |