diff options
Diffstat (limited to 'kernel')
43 files changed, 1469 insertions, 1763 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index f0902a7bd1b3..f2cc0d118a0b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -130,7 +130,7 @@ $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE $(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz - cmd_genikh = $(BASH) $(srctree)/kernel/gen_kheaders.sh $@ + cmd_genikh = $(CONFIG_SHELL) $(srctree)/kernel/gen_kheaders.sh $@ $(obj)/kheaders_data.tar.xz: FORCE $(call cmd,genikh) diff --git a/kernel/audit.c b/kernel/audit.c index da8dc0db5bd3..8e09f0f55b4b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -830,7 +830,7 @@ static int kauditd_thread(void *dummy) rc = kauditd_send_queue(sk, portid, &audit_hold_queue, UNICAST_RETRIES, NULL, kauditd_rehold_skb); - if (ac && rc < 0) { + if (rc < 0) { sk = NULL; auditd_reset(ac); goto main_queue; @@ -840,7 +840,7 @@ static int kauditd_thread(void *dummy) rc = kauditd_send_queue(sk, portid, &audit_retry_queue, UNICAST_RETRIES, NULL, kauditd_hold_skb); - if (ac && rc < 0) { + if (rc < 0) { sk = NULL; auditd_reset(ac); goto main_queue; @@ -2155,18 +2155,19 @@ void audit_log_task_info(struct audit_buffer *ab) EXPORT_SYMBOL(audit_log_task_info); /** - * audit_log_link_denied - report a link restriction denial - * @operation: specific link operation + * audit_log_path_denied - report a path restriction denial + * @type: audit message type (AUDIT_ANOM_LINK, AUDIT_ANOM_CREAT, etc) + * @operation: specific operation name */ -void audit_log_link_denied(const char *operation) +void audit_log_path_denied(int type, const char *operation) { struct audit_buffer *ab; if (!audit_enabled || audit_dummy_context()) return; - /* Generate AUDIT_ANOM_LINK with subject, operation, outcome. */ - ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_ANOM_LINK); + /* Generate log with subject, operation, outcome. */ + ab = audit_log_start(audit_context(), GFP_KERNEL, type); if (!ab) return; audit_log_format(ab, "op=%s", operation); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index caca752ee5e6..3f958b90d914 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -289,7 +289,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, if (irqs_disabled()) { work = this_cpu_ptr(&up_read_work); - if (work->irq_work.flags & IRQ_WORK_BUSY) + if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) /* cannot queue more up_read, fallback */ irq_work_busy = true; } diff --git a/kernel/compat.c b/kernel/compat.c index a2bc1d6ceb57..95005f849c68 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -90,30 +90,6 @@ int compat_put_timespec(const struct timespec *ts, void __user *uts) } EXPORT_SYMBOL_GPL(compat_put_timespec); -int get_compat_itimerval(struct itimerval *o, const struct compat_itimerval __user *i) -{ - struct compat_itimerval v32; - - if (copy_from_user(&v32, i, sizeof(struct compat_itimerval))) - return -EFAULT; - o->it_interval.tv_sec = v32.it_interval.tv_sec; - o->it_interval.tv_usec = v32.it_interval.tv_usec; - o->it_value.tv_sec = v32.it_value.tv_sec; - o->it_value.tv_usec = v32.it_value.tv_usec; - return 0; -} - -int put_compat_itimerval(struct compat_itimerval __user *o, const struct itimerval *i) -{ - struct compat_itimerval v32; - - v32.it_interval.tv_sec = i->it_interval.tv_sec; - v32.it_interval.tv_usec = i->it_interval.tv_usec; - v32.it_value.tv_sec = i->it_value.tv_sec; - v32.it_value.tv_usec = i->it_value.tv_usec; - return copy_to_user(o, &v32, sizeof(struct compat_itimerval)) ? -EFAULT : 0; -} - #ifdef __ARCH_WANT_SYS_SIGPROCMASK /* diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index f76d6f77dd5e..2b7c9b67931d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -441,6 +441,37 @@ setundefined: return 0; } +#ifdef CONFIG_KGDB_KDB +void kdb_dump_stack_on_cpu(int cpu) +{ + if (cpu == raw_smp_processor_id() || !IS_ENABLED(CONFIG_SMP)) { + dump_stack(); + return; + } + + if (!(kgdb_info[cpu].exception_state & DCPU_IS_SLAVE)) { + kdb_printf("ERROR: Task on cpu %d didn't stop in the debugger\n", + cpu); + return; + } + + /* + * In general, architectures don't support dumping the stack of a + * "running" process that's not the current one. From the point of + * view of the Linux, kernel processes that are looping in the kgdb + * slave loop are still "running". There's also no API (that actually + * works across all architectures) that can do a stack crawl based + * on registers passed as a parameter. + * + * Solve this conundrum by asking slave CPUs to do the backtrace + * themselves. + */ + kgdb_info[cpu].exception_state |= DCPU_WANT_BT; + while (kgdb_info[cpu].exception_state & DCPU_WANT_BT) + cpu_relax(); +} +#endif + /* * Return true if there is a valid kgdb I/O module. Also if no * debugger is attached a message can be printed to the console about @@ -580,6 +611,9 @@ cpu_loop: atomic_xchg(&kgdb_active, cpu); break; } + } else if (kgdb_info[cpu].exception_state & DCPU_WANT_BT) { + dump_stack(); + kgdb_info[cpu].exception_state &= ~DCPU_WANT_BT; } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { if (!raw_spin_is_locked(&dbg_slave_lock)) goto return_normal; diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index b4a7c326d546..cd22b5f68831 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h @@ -33,7 +33,7 @@ struct kgdb_state { #define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */ #define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */ #define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */ -#define DCPU_SSTEP 0x8 /* CPU is single stepping */ +#define DCPU_WANT_BT 0x8 /* Slave cpu should backtrace then clear flag */ struct debuggerinfo_struct { void *debuggerinfo; @@ -76,6 +76,7 @@ extern int kdb_stub(struct kgdb_state *ks); extern int kdb_parse(const char *cmdstr); extern int kdb_common_init_state(struct kgdb_state *ks); extern int kdb_common_deinit_state(void); +extern void kdb_dump_stack_on_cpu(int cpu); #else /* ! CONFIG_KGDB_KDB */ static inline int kdb_stub(struct kgdb_state *ks) { diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 7e2379aa0a1e..4af48ac53625 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -22,20 +22,15 @@ static void kdb_show_stack(struct task_struct *p, void *addr) { int old_lvl = console_loglevel; + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; kdb_trap_printk++; - kdb_set_current_task(p); - if (addr) { - show_stack((struct task_struct *)p, addr); - } else if (kdb_current_regs) { -#ifdef CONFIG_X86 - show_stack(p, &kdb_current_regs->sp); -#else - show_stack(p, NULL); -#endif - } else { - show_stack(p, NULL); - } + + if (!addr && kdb_task_has_cpu(p)) + kdb_dump_stack_on_cpu(kdb_process_cpu(p)); + else + show_stack(p, addr); + console_loglevel = old_lvl; kdb_trap_printk--; } @@ -78,12 +73,12 @@ static void kdb_show_stack(struct task_struct *p, void *addr) */ static int -kdb_bt1(struct task_struct *p, unsigned long mask, - int argcount, int btaprompt) +kdb_bt1(struct task_struct *p, unsigned long mask, bool btaprompt) { - char buffer[2]; - if (kdb_getarea(buffer[0], (unsigned long)p) || - kdb_getarea(buffer[0], (unsigned long)(p+1)-1)) + char ch; + + if (kdb_getarea(ch, (unsigned long)p) || + kdb_getarea(ch, (unsigned long)(p+1)-1)) return KDB_BADADDR; if (!kdb_task_state(p, mask)) return 0; @@ -91,22 +86,47 @@ kdb_bt1(struct task_struct *p, unsigned long mask, kdb_ps1(p); kdb_show_stack(p, NULL); if (btaprompt) { - kdb_getstr(buffer, sizeof(buffer), - "Enter <q> to end, <cr> to continue:"); - if (buffer[0] == 'q') { - kdb_printf("\n"); + kdb_printf("Enter <q> to end, <cr> or <space> to continue:"); + do { + ch = kdb_getchar(); + } while (!strchr("\r\n q", ch)); + kdb_printf("\n"); + + /* reset the pager */ + kdb_nextline = 1; + + if (ch == 'q') return 1; - } } touch_nmi_watchdog(); return 0; } +static void +kdb_bt_cpu(unsigned long cpu) +{ + struct task_struct *kdb_tsk; + + if (cpu >= num_possible_cpus() || !cpu_online(cpu)) { + kdb_printf("WARNING: no process for cpu %ld\n", cpu); + return; + } + + /* If a CPU failed to round up we could be here */ + kdb_tsk = KDB_TSK(cpu); + if (!kdb_tsk) { + kdb_printf("WARNING: no task for cpu %ld\n", cpu); + return; + } + + kdb_set_current_task(kdb_tsk); + kdb_bt1(kdb_tsk, ~0UL, false); +} + int kdb_bt(int argc, const char **argv) { int diag; - int argcount = 5; int btaprompt = 1; int nextarg; unsigned long addr; @@ -125,7 +145,7 @@ kdb_bt(int argc, const char **argv) /* Run the active tasks first */ for_each_online_cpu(cpu) { p = kdb_curr_task(cpu); - if (kdb_bt1(p, mask, argcount, btaprompt)) + if (kdb_bt1(p, mask, btaprompt)) return 0; } /* Now the inactive tasks */ @@ -134,7 +154,7 @@ kdb_bt(int argc, const char **argv) return 0; if (task_curr(p)) continue; - if (kdb_bt1(p, mask, argcount, btaprompt)) + if (kdb_bt1(p, mask, btaprompt)) return 0; } kdb_while_each_thread(g, p); } else if (strcmp(argv[0], "btp") == 0) { @@ -148,7 +168,7 @@ kdb_bt(int argc, const char **argv) p = find_task_by_pid_ns(pid, &init_pid_ns); if (p) { kdb_set_current_task(p); - return kdb_bt1(p, ~0UL, argcount, 0); + return kdb_bt1(p, ~0UL, false); } kdb_printf("No process with pid == %ld found\n", pid); return 0; @@ -159,11 +179,10 @@ kdb_bt(int argc, const char **argv) if (diag) return diag; kdb_set_current_task((struct task_struct *)addr); - return kdb_bt1((struct task_struct *)addr, ~0UL, argcount, 0); + return kdb_bt1((struct task_struct *)addr, ~0UL, false); } else if (strcmp(argv[0], "btc") == 0) { unsigned long cpu = ~0; struct task_struct *save_current_task = kdb_current_task; - char buf[80]; if (argc > 1) return KDB_ARGCOUNT; if (argc == 1) { @@ -171,35 +190,22 @@ kdb_bt(int argc, const char **argv) if (diag) return diag; } - /* Recursive use of kdb_parse, do not use argv after - * this point */ - argv = NULL; if (cpu != ~0) { - if (cpu >= num_possible_cpus() || !cpu_online(cpu)) { - kdb_printf("no process for cpu %ld\n", cpu); - return 0; - } - sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); - kdb_parse(buf); - return 0; - } - kdb_printf("btc: cpu status: "); - kdb_parse("cpu\n"); - for_each_online_cpu(cpu) { - void *kdb_tsk = KDB_TSK(cpu); - - /* If a CPU failed to round up we could be here */ - if (!kdb_tsk) { - kdb_printf("WARNING: no task for cpu %ld\n", - cpu); - continue; + kdb_bt_cpu(cpu); + } else { + /* + * Recursive use of kdb_parse, do not use argv after + * this point. + */ + argv = NULL; + kdb_printf("btc: cpu status: "); + kdb_parse("cpu\n"); + for_each_online_cpu(cpu) { + kdb_bt_cpu(cpu); + touch_nmi_watchdog(); } - - sprintf(buf, "btt 0x%px\n", kdb_tsk); - kdb_parse(buf); - touch_nmi_watchdog(); + kdb_set_current_task(save_current_task); } - kdb_set_current_task(save_current_task); return 0; } else { if (argc) { @@ -211,7 +217,7 @@ kdb_bt(int argc, const char **argv) kdb_show_stack(kdb_current_task, (void *)addr); return 0; } else { - return kdb_bt1(kdb_current_task, ~0UL, argcount, 0); + return kdb_bt1(kdb_current_task, ~0UL, false); } } diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 3a5184eb6977..8bcdded5d61f 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -49,14 +49,88 @@ static int kgdb_transition_check(char *buffer) return 0; } -static int kdb_read_get_key(char *buffer, size_t bufsize) +/** + * kdb_handle_escape() - validity check on an accumulated escape sequence. + * @buf: Accumulated escape characters to be examined. Note that buf + * is not a string, it is an array of characters and need not be + * nil terminated. + * @sz: Number of accumulated escape characters. + * + * Return: -1 if the escape sequence is unwanted, 0 if it is incomplete, + * otherwise it returns a mapped key value to pass to the upper layers. + */ +static int kdb_handle_escape(char *buf, size_t sz) +{ + char *lastkey = buf + sz - 1; + + switch (sz) { + case 1: + if (*lastkey == '\e') + return 0; + break; + + case 2: /* \e<something> */ + if (*lastkey == '[') + return 0; + break; + + case 3: + switch (*lastkey) { + case 'A': /* \e[A, up arrow */ + return 16; + case 'B': /* \e[B, down arrow */ + return 14; + case 'C': /* \e[C, right arrow */ + return 6; + case 'D': /* \e[D, left arrow */ + return 2; + case '1': /* \e[<1,3,4>], may be home, del, end */ + case '3': + case '4': + return 0; + } + break; + + case 4: + if (*lastkey == '~') { + switch (buf[2]) { + case '1': /* \e[1~, home */ + return 1; + case '3': /* \e[3~, del */ + return 4; + case '4': /* \e[4~, end */ + return 5; + } + } + break; + } + + return -1; +} + +/** + * kdb_getchar() - Read a single character from a kdb console (or consoles). + * + * Other than polling the various consoles that are currently enabled, + * most of the work done in this function is dealing with escape sequences. + * + * An escape key could be the start of a vt100 control sequence such as \e[D + * (left arrow) or it could be a character in its own right. The standard + * method for detecting the difference is to wait for 2 seconds to see if there + * are any other characters. kdb is complicated by the lack of a timer service + * (interrupts are off), by multiple input sources. Escape sequence processing + * has to be done as states in the polling loop. + * + * Return: The key pressed or a control code derived from an escape sequence. + */ +char kdb_getchar(void) { #define ESCAPE_UDELAY 1000 #define ESCAPE_DELAY (2*1000000/ESCAPE_UDELAY) /* 2 seconds worth of udelays */ - char escape_data[5]; /* longest vt100 escape sequence is 4 bytes */ - char *ped = escape_data; + char buf[4]; /* longest vt100 escape sequence is 4 bytes */ + char *pbuf = buf; int escape_delay = 0; - get_char_func *f, *f_escape = NULL; + get_char_func *f, *f_prev = NULL; int key; for (f = &kdb_poll_funcs[0]; ; ++f) { @@ -65,109 +139,37 @@ static int kdb_read_get_key(char *buffer, size_t bufsize) touch_nmi_watchdog(); f = &kdb_poll_funcs[0]; } - if (escape_delay == 2) { - *ped = '\0'; - ped = escape_data; - --escape_delay; - } - if (escape_delay == 1) { - key = *ped++; - if (!*ped) - --escape_delay; - break; - } + key = (*f)(); if (key == -1) { if (escape_delay) { udelay(ESCAPE_UDELAY); - --escape_delay; + if (--escape_delay == 0) + return '\e'; } continue; } - if (bufsize <= 2) { - if (key == '\r') - key = '\n'; - *buffer++ = key; - *buffer = '\0'; - return -1; - } - if (escape_delay == 0 && key == '\e') { + + /* + * When the first character is received (or we get a change + * input source) we set ourselves up to handle an escape + * sequences (just in case). + */ + if (f_prev != f) { + f_prev = f; + pbuf = buf; escape_delay = ESCAPE_DELAY; - ped = escape_data; - f_escape = f; - } - if (escape_delay) { - *ped++ = key; - if (f_escape != f) { - escape_delay = 2; - continue; - } - if (ped - escape_data == 1) { - /* \e */ - continue; - } else if (ped - escape_data == 2) { - /* \e<something> */ - if (key != '[') - escape_delay = 2; - continue; - } else if (ped - escape_data == 3) { - /* \e[<something> */ - int mapkey = 0; - switch (key) { - case 'A': /* \e[A, up arrow */ - mapkey = 16; - break; - case 'B': /* \e[B, down arrow */ - mapkey = 14; - break; - case 'C': /* \e[C, right arrow */ - mapkey = 6; - break; - case 'D': /* \e[D, left arrow */ - mapkey = 2; - break; - case '1': /* dropthrough */ - case '3': /* dropthrough */ - /* \e[<1,3,4>], may be home, del, end */ - case '4': - mapkey = -1; - break; - } - if (mapkey != -1) { - if (mapkey > 0) { - escape_data[0] = mapkey; - escape_data[1] = '\0'; - } - escape_delay = 2; - } - continue; - } else if (ped - escape_data == 4) { - /* \e[<1,3,4><something> */ - int mapkey = 0; - if (key == '~') { - switch (escape_data[2]) { - case '1': /* \e[1~, home */ - mapkey = 1; - break; - case '3': /* \e[3~, del */ - mapkey = 4; - break; - case '4': /* \e[4~, end */ - mapkey = 5; - break; - } - } - if (mapkey > 0) { - escape_data[0] = mapkey; - escape_data[1] = '\0'; - } - escape_delay = 2; - continue; - } } - break; /* A key to process */ + + *pbuf++ = key; + key = kdb_handle_escape(buf, pbuf - buf); + if (key < 0) /* no escape sequence; return best character */ + return buf[pbuf - buf == 2 ? 1 : 0]; + if (key > 0) + return key; } - return key; + + unreachable(); } /* @@ -188,17 +190,7 @@ static int kdb_read_get_key(char *buffer, size_t bufsize) * function. It is not reentrant - it relies on the fact * that while kdb is running on only one "master debug" cpu. * Remarks: - * - * The buffer size must be >= 2. A buffer size of 2 means that the caller only - * wants a single key. - * - * An escape key could be the start of a vt100 control sequence such as \e[D - * (left arrow) or it could be a character in its own right. The standard - * method for detecting the difference is to wait for 2 seconds to see if there - * are any other characters. kdb is complicated by the lack of a timer service - * (interrupts are off), by multiple input sources and by the need to sometimes - * return after just one key. Escape sequence processing has to be done as - * states in the polling loop. + * The buffer size must be >= 2. */ static char *kdb_read(char *buffer, size_t bufsize) @@ -233,9 +225,7 @@ static char *kdb_read(char *buffer, size_t bufsize) *cp = '\0'; kdb_printf("%s", buffer); poll_again: - key = kdb_read_get_key(buffer, bufsize); - if (key == -1) - return buffer; + key = kdb_getchar(); if (key != 9) tab = 0; switch (key) { @@ -746,7 +736,7 @@ kdb_printit: /* check for having reached the LINES number of printed lines */ if (kdb_nextline >= linecount) { - char buf1[16] = ""; + char ch; /* Watch out for recursion here. Any routine that calls * kdb_printf will come back through here. And kdb_read @@ -781,39 +771,38 @@ kdb_printit: if (logging) printk("%s", moreprompt); - kdb_read(buf1, 2); /* '2' indicates to return - * immediately after getting one key. */ + ch = kdb_getchar(); kdb_nextline = 1; /* Really set output line 1 */ /* empty and reset the buffer: */ kdb_buffer[0] = '\0'; next_avail = kdb_buffer; size_avail = sizeof(kdb_buffer); - if ((buf1[0] == 'q') || (buf1[0] == 'Q')) { + if ((ch == 'q') || (ch == 'Q')) { /* user hit q or Q */ KDB_FLAG_SET(CMD_INTERRUPT); /* command interrupted */ KDB_STATE_CLEAR(PAGER); /* end of command output; back to normal mode */ kdb_grepping_flag = 0; kdb_printf("\n"); - } else if (buf1[0] == ' ') { + } else if (ch == ' ') { kdb_printf("\r"); suspend_grep = 1; /* for this recursion */ - } else if (buf1[0] == '\n') { + } else if (ch == '\n' || ch == '\r') { kdb_nextline = linecount - 1; kdb_printf("\r"); suspend_grep = 1; /* for this recursion */ - } else if (buf1[0] == '/' && !kdb_grepping_flag) { + } else if (ch == '/' && !kdb_grepping_flag) { kdb_printf("\r"); kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN, kdbgetenv("SEARCHPROMPT") ?: "search> "); *strchrnul(kdb_grep_string, '\n') = '\0'; kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH; suspend_grep = 1; /* for this recursion */ - } else if (buf1[0] && buf1[0] != '\n') { - /* user hit something other than enter */ + } else if (ch) { + /* user hit something unexpected */ suspend_grep = 1; /* for this recursion */ - if (buf1[0] != '/') + if (ch != '/') kdb_printf( "\nOnly 'q', 'Q' or '/' are processed at " "more prompt, input ignored\n"); diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 2118d8258b7c..55d052061ef9 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -210,6 +210,7 @@ extern void kdb_ps1(const struct task_struct *p); extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig(struct task_struct *p, int sig); extern void kdb_meminfo_proc_show(void); +extern char kdb_getchar(void); extern char *kdb_getstr(char *, size_t, const char *); extern void kdb_gdb_state_pass(char *buf); diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index d47bd40fc0f5..d14cbc83986a 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -178,7 +178,7 @@ bool dma_in_atomic_pool(void *start, size_t size) if (unlikely(!atomic_pool)) return false; - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); } void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c74761004ee5..ece7e13f6e4a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1457,7 +1457,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); - if (area->vaddr & ~PAGE_MASK) { + if (IS_ERR_VALUE(area->vaddr)) { ret = area->vaddr; goto fail; } diff --git a/kernel/exit.c b/kernel/exit.c index 0bac4b60d5f3..bcbd59888e67 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1409,7 +1409,7 @@ static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode, void __wake_up_parent(struct task_struct *p, struct task_struct *parent) { __wake_up_sync_key(&parent->signal->wait_chldexit, - TASK_INTERRUPTIBLE, 1, p); + TASK_INTERRUPTIBLE, p); } static long do_wait(struct wait_opts *wo) diff --git a/kernel/fork.c b/kernel/fork.c index 0f0bac8318dd..2508a4f238a3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -93,6 +93,7 @@ #include <linux/livepatch.h> #include <linux/thread_info.h> #include <linux/stackleak.h> +#include <linux/kasan.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -223,6 +224,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; + /* Clear the KASAN shadow of the stack. */ + kasan_unpoison_shadow(s->addr, THREAD_SIZE); + /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); @@ -2181,7 +2185,7 @@ static __latent_entropy struct task_struct *copy_process( */ p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boottime_ns(); + p->start_boottime = ktime_get_boottime_ns(); /* * Make it visible to the rest of the system, but dont wake it up yet. diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 5a0fc0b0403a..e13ca842eb7e 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # SPDX-License-Identifier: GPL-2.0 # This script generates an archive consisting of kernel headers @@ -21,30 +21,38 @@ arch/$SRCARCH/include/ # Uncomment it for debugging. # if [ ! -f /tmp/iter ]; then iter=1; echo 1 > /tmp/iter; # else iter=$(($(cat /tmp/iter) + 1)); echo $iter > /tmp/iter; fi -# find $src_file_list -name "*.h" | xargs ls -l > /tmp/src-ls-$iter -# find $obj_file_list -name "*.h" | xargs ls -l > /tmp/obj-ls-$iter +# find $all_dirs -name "*.h" | xargs ls -l > /tmp/ls-$iter + +all_dirs= +if [ "$building_out_of_srctree" ]; then + for d in $dir_list; do + all_dirs="$all_dirs $srctree/$d" + done +fi +all_dirs="$all_dirs $dir_list" # include/generated/compile.h is ignored because it is touched even when none -# of the source files changed. This causes pointless regeneration, so let us -# ignore them for md5 calculation. -pushd $srctree > /dev/null -src_files_md5="$(find $dir_list -name "*.h" | - grep -v "include/generated/compile.h" | - grep -v "include/generated/autoconf.h" | - xargs ls -l | md5sum | cut -d ' ' -f1)" -popd > /dev/null -obj_files_md5="$(find $dir_list -name "*.h" | - grep -v "include/generated/compile.h" | - grep -v "include/generated/autoconf.h" | +# of the source files changed. +# +# When Kconfig regenerates include/generated/autoconf.h, its timestamp is +# updated, but the contents might be still the same. When any CONFIG option is +# changed, Kconfig touches the corresponding timestamp file include/config/*.h. +# Hence, the md5sum detects the configuration change anyway. We do not need to +# check include/generated/autoconf.h explicitly. +# +# Ignore them for md5 calculation to avoid pointless regeneration. +headers_md5="$(find $all_dirs -name "*.h" | + grep -v "include/generated/compile.h" | + grep -v "include/generated/autoconf.h" | xargs ls -l | md5sum | cut -d ' ' -f1)" + # Any changes to this script will also cause a rebuild of the archive. this_file_md5="$(ls -l $sfile | md5sum | cut -d ' ' -f1)" if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi if [ -f kernel/kheaders.md5 ] && - [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] && - [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] && - [ "$(cat kernel/kheaders.md5|head -3|tail -1)" == "$this_file_md5" ] && - [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then + [ "$(head -n 1 kernel/kheaders.md5)" = "$headers_md5" ] && + [ "$(head -n 2 kernel/kheaders.md5 | tail -n 1)" = "$this_file_md5" ] && + [ "$(tail -n 1 kernel/kheaders.md5)" = "$tarfile_md5" ]; then exit fi @@ -55,14 +63,17 @@ fi rm -rf $cpio_dir mkdir $cpio_dir -pushd $srctree > /dev/null -for f in $dir_list; - do find "$f" -name "*.h"; -done | cpio --quiet -pd $cpio_dir -popd > /dev/null +if [ "$building_out_of_srctree" ]; then + ( + cd $srctree + for f in $dir_list + do find "$f" -name "*.h"; + done | cpio --quiet -pd $cpio_dir + ) +fi -# The second CPIO can complain if files already exist which can -# happen with out of tree builds. Just silence CPIO for now. +# The second CPIO can complain if files already exist which can happen with out +# of tree builds having stale headers in srctree. Just silence CPIO for now. for f in $dir_list; do find "$f" -name "*.h"; done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1 @@ -79,8 +90,7 @@ find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \ --owner=0 --group=0 --numeric-owner --no-recursion \ -Jcf $tarfile -C $cpio_dir/ -T - > /dev/null -echo "$src_files_md5" > kernel/kheaders.md5 -echo "$obj_files_md5" >> kernel/kheaders.md5 +echo $headers_md5 > kernel/kheaders.md5 echo "$this_file_md5" >> kernel/kheaders.md5 echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5 diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b76703b2c0af..b3fa2d87d2f3 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1298,6 +1298,50 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_mask_irq); #endif /* CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS */ /** + * irq_chip_set_parent_state - set the state of a parent interrupt. + * + * @data: Pointer to interrupt specific data + * @which: State to be restored (one of IRQCHIP_STATE_*) + * @val: Value corresponding to @which + * + * Conditional success, if the underlying irqchip does not implement it. + */ +int irq_chip_set_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool val) +{ + data = data->parent_data; + + if (!data || !data->chip->irq_set_irqchip_state) + return 0; + + return data->chip->irq_set_irqchip_state(data, which, val); +} +EXPORT_SYMBOL_GPL(irq_chip_set_parent_state); + +/** + * irq_chip_get_parent_state - get the state of a parent interrupt. + * + * @data: Pointer to interrupt specific data + * @which: one of IRQCHIP_STATE_* the caller wants to know + * @state: a pointer to a boolean where the state is to be stored + * + * Conditional success, if the underlying irqchip does not implement it. + */ +int irq_chip_get_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state) +{ + data = data->parent_data; + + if (!data || !data->chip->irq_get_irqchip_state) + return 0; + + return data->chip->irq_get_irqchip_state(data, which, state); +} +EXPORT_SYMBOL_GPL(irq_chip_get_parent_state); + +/** * irq_chip_enable_parent - Enable the parent interrupt (defaults to unmask if * NULL) * @data: Pointer to interrupt specific data diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9be995fc3c5a..5b8fdd659e54 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -750,7 +750,7 @@ void irq_free_descs(unsigned int from, unsigned int cnt) EXPORT_SYMBOL_GPL(irq_free_descs); /** - * irq_alloc_descs - allocate and initialize a range of irq descriptors + * __irq_alloc_descs - allocate and initialize a range of irq descriptors * @irq: Allocate for specific irq number if irq >= 0 * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. diff --git a/kernel/irq_work.c b/kernel/irq_work.c index d42acaf81886..828cc30774bc 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -29,24 +29,16 @@ static DEFINE_PER_CPU(struct llist_head, lazy_list); */ static bool irq_work_claim(struct irq_work *work) { - unsigned long flags, oflags, nflags; + int oflags; + oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags); /* - * Start with our best wish as a premise but only trust any - * flag value after cmpxchg() result. + * If the work is already pending, no need to raise the IPI. + * The pairing atomic_fetch_andnot() in irq_work_run() makes sure + * everything we did before is visible. */ - flags = work->flags & ~IRQ_WORK_PENDING; - for (;;) { - nflags = flags | IRQ_WORK_CLAIMED; - oflags = cmpxchg(&work->flags, flags, nflags); - if (oflags == flags) - break; - if (oflags & IRQ_WORK_PENDING) - return false; - flags = oflags; - cpu_relax(); - } - + if (oflags & IRQ_WORK_PENDING) + return false; return true; } @@ -61,7 +53,7 @@ void __weak arch_irq_work_raise(void) static void __irq_work_queue_local(struct irq_work *work) { /* If the work is "lazy", handle it from next tick if any */ - if (work->flags & IRQ_WORK_LAZY) { + if (atomic_read(&work->flags) & IRQ_WORK_LAZY) { if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && tick_nohz_tick_stopped()) arch_irq_work_raise(); @@ -143,7 +135,6 @@ static void irq_work_run_list(struct llist_head *list) { struct irq_work *work, *tmp; struct llist_node *llnode; - unsigned long flags; BUG_ON(!irqs_disabled()); @@ -152,6 +143,7 @@ static void irq_work_run_list(struct llist_head *list) llnode = llist_del_all(list); llist_for_each_entry_safe(work, tmp, llnode, llnode) { + int flags; /* * Clear the PENDING bit, after this point the @work * can be re-used. @@ -159,15 +151,15 @@ static void irq_work_run_list(struct llist_head *list) * to claim that work don't rely on us to handle their data * while we are in the middle of the func. */ - flags = work->flags & ~IRQ_WORK_PENDING; - xchg(&work->flags, flags); + flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags); work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); + flags &= ~IRQ_WORK_PENDING; + (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); } } @@ -199,7 +191,7 @@ void irq_work_sync(struct irq_work *work) { lockdep_assert_irqs_enabled(); - while (work->flags & IRQ_WORK_BUSY) + while (atomic_read(&work->flags) & IRQ_WORK_BUSY) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); diff --git a/kernel/kcov.c b/kernel/kcov.c index 2ee38727844a..f50354202dbe 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/hashtable.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/preempt.h> @@ -21,8 +22,11 @@ #include <linux/uaccess.h> #include <linux/kcov.h> #include <linux/refcount.h> +#include <linux/log2.h> #include <asm/setup.h> +#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__) + /* Number of 64-bit words written per one comparison: */ #define KCOV_WORDS_PER_CMP 4 @@ -44,19 +48,100 @@ struct kcov { * Reference counter. We keep one for: * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) + * - each code section for remote coverage collection */ refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; - /* Size of arena (in long's for KCOV_MODE_TRACE). */ - unsigned size; + /* Size of arena (in long's). */ + unsigned int size; /* Coverage buffer shared with user space. */ void *area; /* Task for which we collect coverage, or NULL. */ struct task_struct *t; + /* Collecting coverage from remote (background) threads. */ + bool remote; + /* Size of remote area (in long's). */ + unsigned int remote_size; + /* + * Sequence is incremented each time kcov is reenabled, used by + * kcov_remote_stop(), see the comment there. + */ + int sequence; }; +struct kcov_remote_area { + struct list_head list; + unsigned int size; +}; + +struct kcov_remote { + u64 handle; + struct kcov *kcov; + struct hlist_node hnode; +}; + +static DEFINE_SPINLOCK(kcov_remote_lock); +static DEFINE_HASHTABLE(kcov_remote_map, 4); +static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote *kcov_remote_find(u64 handle) +{ + struct kcov_remote *remote; + + hash_for_each_possible(kcov_remote_map, remote, hnode, handle) { + if (remote->handle == handle) + return remote; + } + return NULL; +} + +static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle) +{ + struct kcov_remote *remote; + + if (kcov_remote_find(handle)) + return ERR_PTR(-EEXIST); + remote = kmalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) + return ERR_PTR(-ENOMEM); + remote->handle = handle; + remote->kcov = kcov; + hash_add(kcov_remote_map, &remote->hnode, handle); + return remote; +} + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote_area *kcov_remote_area_get(unsigned int size) +{ + struct kcov_remote_area *area; + struct list_head *pos; + + kcov_debug("size = %u\n", size); + list_for_each(pos, &kcov_remote_areas) { + area = list_entry(pos, struct kcov_remote_area, list); + if (area->size == size) { + list_del(&area->list); + kcov_debug("rv = %px\n", area); + return area; + } + } + kcov_debug("rv = NULL\n"); + return NULL; +} + +/* Must be called with kcov_remote_lock locked. */ +static void kcov_remote_area_put(struct kcov_remote_area *area, + unsigned int size) +{ + kcov_debug("area = %px, size = %u\n", area, size); + INIT_LIST_HEAD(&area->list); + area->size = size; + list_add(&area->list, &kcov_remote_areas); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -73,7 +158,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * in_interrupt() returns false (e.g. preempt_schedule_irq()). * READ_ONCE()/barrier() effectively provides load-acquire wrt * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). + * kcov_start(). */ barrier(); return mode == needed_mode; @@ -227,6 +312,78 @@ void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) EXPORT_SYMBOL(__sanitizer_cov_trace_switch); #endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ +static void kcov_start(struct task_struct *t, unsigned int size, + void *area, enum kcov_mode mode, int sequence) +{ + kcov_debug("t = %px, size = %u, area = %px\n", t, size, area); + /* Cache in task struct for performance. */ + t->kcov_size = size; + t->kcov_area = area; + /* See comment in check_kcov_mode(). */ + barrier(); + WRITE_ONCE(t->kcov_mode, mode); + t->kcov_sequence = sequence; +} + +static void kcov_stop(struct task_struct *t) +{ + WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); + barrier(); + t->kcov_size = 0; + t->kcov_area = NULL; +} + +static void kcov_task_reset(struct task_struct *t) +{ + kcov_stop(t); + t->kcov = NULL; + t->kcov_sequence = 0; + t->kcov_handle = 0; +} + +void kcov_task_init(struct task_struct *t) +{ + kcov_task_reset(t); + t->kcov_handle = current->kcov_handle; +} + +static void kcov_reset(struct kcov *kcov) +{ + kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; + kcov->remote = false; + kcov->remote_size = 0; + kcov->sequence++; +} + +static void kcov_remote_reset(struct kcov *kcov) +{ + int bkt; + struct kcov_remote *remote; + struct hlist_node *tmp; + + spin_lock(&kcov_remote_lock); + hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) { + if (remote->kcov != kcov) + continue; + kcov_debug("removing handle %llx\n", remote->handle); + hash_del(&remote->hnode); + kfree(remote); + } + /* Do reset before unlock to prevent races with kcov_remote_start(). */ + kcov_reset(kcov); + spin_unlock(&kcov_remote_lock); +} + +static void kcov_disable(struct task_struct *t, struct kcov *kcov) +{ + kcov_task_reset(t); + if (kcov->remote) + kcov_remote_reset(kcov); + else + kcov_reset(kcov); +} + static void kcov_get(struct kcov *kcov) { refcount_inc(&kcov->refcount); @@ -235,20 +392,12 @@ static void kcov_get(struct kcov *kcov) static void kcov_put(struct kcov *kcov) { if (refcount_dec_and_test(&kcov->refcount)) { + kcov_remote_reset(kcov); vfree(kcov->area); kfree(kcov); } } -void kcov_task_init(struct task_struct *t) -{ - WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); - barrier(); - t->kcov_size = 0; - t->kcov_area = NULL; - t->kcov = NULL; -} - void kcov_task_exit(struct task_struct *t) { struct kcov *kcov; @@ -256,15 +405,36 @@ void kcov_task_exit(struct task_struct *t) kcov = t->kcov; if (kcov == NULL) return; + spin_lock(&kcov->lock); + kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t); + /* + * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t, + * which comes down to: + * WARN_ON(!kcov->remote && kcov->t != t); + * + * For KCOV_REMOTE_ENABLE devices, the exiting task is either: + * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * In this case we should print a warning right away, since a task + * shouldn't be exiting when it's in a kcov coverage collection + * section. Here t points to the task that is collecting remote + * coverage, and t->kcov->t points to the thread that created the + * kcov device. Which means that to detect this case we need to + * check that t != t->kcov->t, and this gives us the following: + * WARN_ON(kcov->remote && kcov->t != t); + * + * 2. The task that created kcov exiting without calling KCOV_DISABLE, + * and then again we can make sure that t->kcov->t == t: + * WARN_ON(kcov->remote && kcov->t != t); + * + * By combining all three checks into one we get: + */ if (WARN_ON(kcov->t != t)) { spin_unlock(&kcov->lock); return; } /* Just to not leave dangling references behind. */ - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -313,6 +483,7 @@ static int kcov_open(struct inode *inode, struct file *filep) if (!kcov) return -ENOMEM; kcov->mode = KCOV_MODE_DISABLED; + kcov->sequence = 1; refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -325,6 +496,20 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +static int kcov_get_mode(unsigned long arg) +{ + if (arg == KCOV_TRACE_PC) + return KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + return KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; +} + /* * Fault in a lazily-faulted vmalloc area before it can be used by * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the @@ -340,14 +525,35 @@ static void kcov_fault_in_area(struct kcov *kcov) READ_ONCE(area[offset]); } +static inline bool kcov_check_handle(u64 handle, bool common_valid, + bool uncommon_valid, bool zero_valid) +{ + if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK)) + return false; + switch (handle & KCOV_SUBSYSTEM_MASK) { + case KCOV_SUBSYSTEM_COMMON: + return (handle & KCOV_INSTANCE_MASK) ? + common_valid : zero_valid; + case KCOV_SUBSYSTEM_USB: + return uncommon_valid; + default: + return false; + } + return false; +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; unsigned long size, unused; + int mode, i; + struct kcov_remote_arg *remote_arg; + struct kcov_remote *remote; switch (cmd) { case KCOV_INIT_TRACE: + kcov_debug("KCOV_INIT_TRACE\n"); /* * Enable kcov in trace mode and setup buffer size. * Must happen before anything else. @@ -366,6 +572,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: + kcov_debug("KCOV_ENABLE\n"); /* * Enable coverage for the current task. * At this point user must have been enabled trace mode, @@ -378,29 +585,20 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; - if (arg == KCOV_TRACE_PC) - kcov->mode = KCOV_MODE_TRACE_PC; - else if (arg == KCOV_TRACE_CMP) -#ifdef CONFIG_KCOV_ENABLE_COMPARISONS - kcov->mode = KCOV_MODE_TRACE_CMP; -#else - return -ENOTSUPP; -#endif - else - return -EINVAL; + mode = kcov_get_mode(arg); + if (mode < 0) + return mode; kcov_fault_in_area(kcov); - /* Cache in task struct for performance. */ - t->kcov_size = kcov->size; - t->kcov_area = kcov->area; - /* See comment in check_kcov_mode(). */ - barrier(); - WRITE_ONCE(t->kcov_mode, kcov->mode); + kcov->mode = mode; + kcov_start(t, kcov->size, kcov->area, kcov->mode, + kcov->sequence); t->kcov = kcov; kcov->t = t; - /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */ + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ kcov_get(kcov); return 0; case KCOV_DISABLE: + kcov_debug("KCOV_DISABLE\n"); /* Disable coverage for the current task. */ unused = arg; if (unused != 0 || current->kcov != kcov) @@ -408,11 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (WARN_ON(kcov->t != t)) return -EINVAL; - kcov_task_init(t); - kcov->t = NULL; - kcov->mode = KCOV_MODE_INIT; + kcov_disable(t, kcov); kcov_put(kcov); return 0; + case KCOV_REMOTE_ENABLE: + kcov_debug("KCOV_REMOTE_ENABLE\n"); + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) + return -EINVAL; + t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; + remote_arg = (struct kcov_remote_arg *)arg; + mode = kcov_get_mode(remote_arg->trace_mode); + if (mode < 0) + return mode; + if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long)) + return -EINVAL; + kcov->mode = mode; + t->kcov = kcov; + kcov->t = t; + kcov->remote = true; + kcov->remote_size = remote_arg->area_size; + spin_lock(&kcov_remote_lock); + for (i = 0; i < remote_arg->num_handles; i++) { + kcov_debug("handle %llx\n", remote_arg->handles[i]); + if (!kcov_check_handle(remote_arg->handles[i], + false, true, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, remote_arg->handles[i]); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + } + if (remote_arg->common_handle) { + kcov_debug("common handle %llx\n", + remote_arg->common_handle); + if (!kcov_check_handle(remote_arg->common_handle, + true, false, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, + remote_arg->common_handle); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + t->kcov_handle = remote_arg->common_handle; + } + spin_unlock(&kcov_remote_lock); + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ + kcov_get(kcov); + return 0; default: return -ENOTTY; } @@ -422,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kcov *kcov; int res; + struct kcov_remote_arg *remote_arg = NULL; + unsigned int remote_num_handles; + unsigned long remote_arg_size; + + if (cmd == KCOV_REMOTE_ENABLE) { + if (get_user(remote_num_handles, (unsigned __user *)(arg + + offsetof(struct kcov_remote_arg, num_handles)))) + return -EFAULT; + if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES) + return -EINVAL; + remote_arg_size = struct_size(remote_arg, handles, + remote_num_handles); + remote_arg = memdup_user((void __user *)arg, remote_arg_size); + if (IS_ERR(remote_arg)) + return PTR_ERR(remote_arg); + if (remote_arg->num_handles != remote_num_handles) { + kfree(remote_arg); + return -EINVAL; + } + arg = (unsigned long)remote_arg; + } kcov = filep->private_data; spin_lock(&kcov->lock); res = kcov_ioctl_locked(kcov, cmd, arg); spin_unlock(&kcov->lock); + + kfree(remote_arg); + return res; } @@ -438,6 +714,207 @@ static const struct file_operations kcov_fops = { .release = kcov_close, }; +/* + * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section + * of code in a kernel background thread to allow kcov to be used to collect + * coverage from that part of code. + * + * The handle argument of kcov_remote_start() identifies a code section that is + * used for coverage collection. A userspace process passes this handle to + * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting + * coverage for the code section identified by this handle. + * + * The usage of these annotations in the kernel code is different depending on + * the type of the kernel thread whose code is being annotated. + * + * For global kernel threads that are spawned in a limited number of instances + * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each + * instance must be assigned a unique 4-byte instance id. The instance id is + * then combined with a 1-byte subsystem id to get a handle via + * kcov_remote_handle(subsystem_id, instance_id). + * + * For local kernel threads that are spawned from system calls handler when a + * user interacts with some kernel interface (e.g. vhost workers), a handle is + * passed from a userspace process as the common_handle field of the + * kcov_remote_arg struct (note, that the user must generate a handle by using + * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an + * arbitrary 4-byte non-zero number as the instance id). This common handle + * then gets saved into the task_struct of the process that issued the + * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn + * kernel threads, the common handle must be retrived via kcov_common_handle() + * and passed to the spawned threads via custom annotations. Those kernel + * threads must in turn be annotated with kcov_remote_start(common_handle) and + * kcov_remote_stop(). All of the threads that are spawned by the same process + * obtain the same handle, hence the name "common". + * + * See Documentation/dev-tools/kcov.rst for more details. + * + * Internally, this function looks up the kcov device associated with the + * provided handle, allocates an area for coverage collection, and saves the + * pointers to kcov and area into the current task_struct to allow coverage to + * be collected via __sanitizer_cov_trace_pc() + * In turns kcov_remote_stop() clears those pointers from task_struct to stop + * collecting coverage and copies all collected coverage into the kcov area. + */ +void kcov_remote_start(u64 handle) +{ + struct kcov_remote *remote; + void *area; + struct task_struct *t; + unsigned int size; + enum kcov_mode mode; + int sequence; + + if (WARN_ON(!kcov_check_handle(handle, true, true, true))) + return; + if (WARN_ON(!in_task())) + return; + t = current; + /* + * Check that kcov_remote_start is not called twice + * nor called by user tasks (with enabled kcov). + */ + if (WARN_ON(t->kcov)) + return; + + kcov_debug("handle = %llx\n", handle); + + spin_lock(&kcov_remote_lock); + remote = kcov_remote_find(handle); + if (!remote) { + kcov_debug("no remote found"); + spin_unlock(&kcov_remote_lock); + return; + } + /* Put in kcov_remote_stop(). */ + kcov_get(remote->kcov); + t->kcov = remote->kcov; + /* + * Read kcov fields before unlock to prevent races with + * KCOV_DISABLE / kcov_remote_reset(). + */ + size = remote->kcov->remote_size; + mode = remote->kcov->mode; + sequence = remote->kcov->sequence; + area = kcov_remote_area_get(size); + spin_unlock(&kcov_remote_lock); + + if (!area) { + area = vmalloc(size * sizeof(unsigned long)); + if (!area) { + t->kcov = NULL; + kcov_put(remote->kcov); + return; + } + } + /* Reset coverage size. */ + *(u64 *)area = 0; + + kcov_debug("area = %px, size = %u", area, size); + + kcov_start(t, size, area, mode, sequence); + +} +EXPORT_SYMBOL(kcov_remote_start); + +static void kcov_move_area(enum kcov_mode mode, void *dst_area, + unsigned int dst_area_size, void *src_area) +{ + u64 word_size = sizeof(unsigned long); + u64 count_size, entry_size_log; + u64 dst_len, src_len; + void *dst_entries, *src_entries; + u64 dst_occupied, dst_free, bytes_to_move, entries_moved; + + kcov_debug("%px %u <= %px %lu\n", + dst_area, dst_area_size, src_area, *(unsigned long *)src_area); + + switch (mode) { + case KCOV_MODE_TRACE_PC: + dst_len = READ_ONCE(*(unsigned long *)dst_area); + src_len = *(unsigned long *)src_area; + count_size = sizeof(unsigned long); + entry_size_log = __ilog2_u64(sizeof(unsigned long)); + break; + case KCOV_MODE_TRACE_CMP: + dst_len = READ_ONCE(*(u64 *)dst_area); + src_len = *(u64 *)src_area; + count_size = sizeof(u64); + BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP)); + entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP); + break; + default: + WARN_ON(1); + return; + } + + /* As arm can't divide u64 integers use log of entry size. */ + if (dst_len > ((dst_area_size * word_size - count_size) >> + entry_size_log)) + return; + dst_occupied = count_size + (dst_len << entry_size_log); + dst_free = dst_area_size * word_size - dst_occupied; + bytes_to_move = min(dst_free, src_len << entry_size_log); + dst_entries = dst_area + dst_occupied; + src_entries = src_area + count_size; + memcpy(dst_entries, src_entries, bytes_to_move); + entries_moved = bytes_to_move >> entry_size_log; + + switch (mode) { + case KCOV_MODE_TRACE_PC: + WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved); + break; + case KCOV_MODE_TRACE_CMP: + WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved); + break; + default: + break; + } +} + +/* See the comment before kcov_remote_start() for usage details. */ +void kcov_remote_stop(void) +{ + struct task_struct *t = current; + struct kcov *kcov = t->kcov; + void *area = t->kcov_area; + unsigned int size = t->kcov_size; + int sequence = t->kcov_sequence; + + if (!kcov) { + kcov_debug("no kcov found\n"); + return; + } + + kcov_stop(t); + t->kcov = NULL; + + spin_lock(&kcov->lock); + /* + * KCOV_DISABLE could have been called between kcov_remote_start() + * and kcov_remote_stop(), hence the check. + */ + kcov_debug("move if: %d == %d && %d\n", + sequence, kcov->sequence, (int)kcov->remote); + if (sequence == kcov->sequence && kcov->remote) + kcov_move_area(kcov->mode, kcov->area, kcov->size, area); + spin_unlock(&kcov->lock); + + spin_lock(&kcov_remote_lock); + kcov_remote_area_put(area, size); + spin_unlock(&kcov_remote_lock); + + kcov_put(kcov); +} +EXPORT_SYMBOL(kcov_remote_stop); + +/* See the comment before kcov_remote_start() for usage details. */ +u64 kcov_common_handle(void) +{ + return current->kcov_handle; +} +EXPORT_SYMBOL(kcov_common_handle); + static int __init kcov_init(void) { /* diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 79f252af7dee..a2df93948665 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -1304,7 +1304,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, if (kernel_map) { phdr->p_type = PT_LOAD; phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_vaddr = (Elf64_Addr)_text; + phdr->p_vaddr = (unsigned long) _text; phdr->p_filesz = phdr->p_memsz = _end - _text; phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); ehdr->e_phnum++; @@ -1321,7 +1321,7 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map, phdr->p_offset = mstart; phdr->p_paddr = mstart; - phdr->p_vaddr = (unsigned long long) __va(mstart); + phdr->p_vaddr = (unsigned long) __va(mstart); phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; phdr->p_align = 0; ehdr->e_phnum++; diff --git a/kernel/module.c b/kernel/module.c index 052a40212b8e..3a486f826224 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1033,6 +1033,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); + /* someone could wait for the module in add_unformed_module() */ + wake_up_all(&module_wq); return 0; out: mutex_unlock(&module_mutex); @@ -1400,7 +1402,7 @@ static int verify_namespace_is_imported(const struct load_info *info, char *imported_namespace; namespace = kernel_symbol_namespace(sym); - if (namespace) { + if (namespace && namespace[0]) { imported_namespace = get_modinfo(info, "import_ns"); while (imported_namespace) { if (strcmp(namespace, imported_namespace) == 0) diff --git a/kernel/notifier.c b/kernel/notifier.c index d9f5081d578d..63d7501ac638 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -23,22 +23,10 @@ static int notifier_chain_register(struct notifier_block **nl, struct notifier_block *n) { while ((*nl) != NULL) { - WARN_ONCE(((*nl) == n), "double register detected"); - if (n->priority > (*nl)->priority) - break; - nl = &((*nl)->next); - } - n->next = *nl; - rcu_assign_pointer(*nl, n); - return 0; -} - -static int notifier_chain_cond_register(struct notifier_block **nl, - struct notifier_block *n) -{ - while ((*nl) != NULL) { - if ((*nl) == n) + if (unlikely((*nl) == n)) { + WARN(1, "double register detected"); return 0; + } if (n->priority > (*nl)->priority) break; nl = &((*nl)->next); @@ -233,29 +221,6 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); /** - * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain - * @nh: Pointer to head of the blocking notifier chain - * @n: New entry in notifier chain - * - * Adds a notifier to a blocking notifier chain, only if not already - * present in the chain. - * Must be called in process context. - * - * Currently always returns zero. - */ -int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, - struct notifier_block *n) -{ - int ret; - - down_write(&nh->rwsem); - ret = notifier_chain_cond_register(&nh->head, n); - up_write(&nh->rwsem); - return ret; -} -EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); - -/** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain * @n: Entry to remove from notifier chain diff --git a/kernel/power/power.h b/kernel/power/power.h index 44bee462ff57..7cdc64dc2373 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -179,7 +179,7 @@ extern void swsusp_close(fmode_t); extern int swsusp_unmark(void); #endif -struct timeval; +struct __kernel_old_timeval; /* kernel/power/swsusp.c */ extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index a45cba7df0ae..83edf8698118 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -714,8 +714,10 @@ s32 freq_qos_read_value(struct freq_constraints *qos, * @req: Constraint request to apply. * @action: Action to perform (add/update/remove). * @value: Value to assign to the QoS request. + * + * This is only meant to be called from inside pm_qos, not drivers. */ -static int freq_qos_apply(struct freq_qos_request *req, +int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c8be5a0f5259..1ef6f75d92f1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2961,7 +2961,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { .func = wake_up_klogd_work_func, - .flags = IRQ_WORK_LAZY, + .flags = ATOMIC_INIT(IRQ_WORK_LAZY), }; void wake_up_klogd(void) diff --git a/kernel/profile.c b/kernel/profile.c index af7c94bf5fa1..4b144b02ca5d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -336,7 +336,7 @@ static int profile_dead_cpu(unsigned int cpu) struct page *page; int i; - if (prof_cpu_mask != NULL) + if (cpumask_available(prof_cpu_mask)) cpumask_clear_cpu(cpu, prof_cpu_mask); for (i = 0; i < 2; i++) { @@ -373,7 +373,7 @@ static int profile_prepare_cpu(unsigned int cpu) static int profile_online_cpu(unsigned int cpu) { - if (prof_cpu_mask != NULL) + if (cpumask_available(prof_cpu_mask)) cpumask_set_cpu(cpu, prof_cpu_mask); return 0; @@ -403,7 +403,7 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && prof_cpu_mask != NULL && + if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 86800b4d5453..322ca8860f54 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -915,7 +915,7 @@ static int __init sugov_register(void) { return cpufreq_register_governor(&schedutil_gov); } -fs_initcall(sugov_register); +core_initcall(sugov_register); #ifdef CONFIG_ENERGY_MODEL extern bool sched_energy_update; diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index c1e566a114ca..ba059fbfc53a 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); * __wake_up_sync_key - wake up threads blocked on a waitqueue. * @wq_head: the waitqueue * @mode: which threads - * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: opaque value to be passed to wakeup targets * * The sync wakeup differs that the waker knows that it will schedule @@ -183,26 +182,44 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); * accessing the task state. */ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, - int nr_exclusive, void *key) + void *key) { - int wake_flags = 1; /* XXX WF_SYNC */ - if (unlikely(!wq_head)) return; - if (unlikely(nr_exclusive != 1)) - wake_flags = 0; - - __wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key); + __wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key); } EXPORT_SYMBOL_GPL(__wake_up_sync_key); +/** + * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue. + * @wq_head: the waitqueue + * @mode: which threads + * @key: opaque value to be passed to wakeup targets + * + * The sync wakeup differs in that the waker knows that it will schedule + * away soon, so while the target thread will be woken up, it will not + * be migrated to another CPU - ie. the two threads are 'synchronized' + * with each other. This can prevent needless bouncing between CPUs. + * + * On UP it can prevent extra preemption. + * + * If this function wakes up a task, it executes a full memory barrier before + * accessing the task state. + */ +void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, + unsigned int mode, void *key) +{ + __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL); +} +EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key); + /* * __wake_up_sync - see __wake_up_sync_key() */ -void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive) +void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode) { - __wake_up_sync_key(wq_head, mode, nr_exclusive, NULL); + __wake_up_sync_key(wq_head, mode, NULL); } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index dba52a7db5e8..12d2227e5786 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -75,6 +75,7 @@ struct seccomp_knotif { /* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */ int error; long val; + u32 flags; /* Signals when this has entered SECCOMP_NOTIFY_REPLIED */ struct completion ready; @@ -732,11 +733,12 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter) return filter->notif->next_id++; } -static void seccomp_do_user_notification(int this_syscall, - struct seccomp_filter *match, - const struct seccomp_data *sd) +static int seccomp_do_user_notification(int this_syscall, + struct seccomp_filter *match, + const struct seccomp_data *sd) { int err; + u32 flags = 0; long ret = 0; struct seccomp_knotif n = {}; @@ -764,6 +766,7 @@ static void seccomp_do_user_notification(int this_syscall, if (err == 0) { ret = n.val; err = n.error; + flags = n.flags; } /* @@ -780,8 +783,14 @@ static void seccomp_do_user_notification(int this_syscall, list_del(&n.list); out: mutex_unlock(&match->notify_lock); + + /* Userspace requests to continue the syscall. */ + if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) + return 0; + syscall_set_return_value(current, task_pt_regs(current), err, ret); + return -1; } static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, @@ -867,8 +876,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; case SECCOMP_RET_USER_NOTIF: - seccomp_do_user_notification(this_syscall, match, sd); - goto skip; + if (seccomp_do_user_notification(this_syscall, match, sd)) + goto skip; + + return 0; case SECCOMP_RET_LOG: seccomp_log(this_syscall, 0, action, true); @@ -1087,7 +1098,11 @@ static long seccomp_notify_send(struct seccomp_filter *filter, if (copy_from_user(&resp, buf, sizeof(resp))) return -EFAULT; - if (resp.flags) + if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE) + return -EINVAL; + + if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) && + (resp.error || resp.val)) return -EINVAL; ret = mutex_lock_interruptible(&filter->notify_lock); @@ -1116,6 +1131,7 @@ static long seccomp_notify_send(struct seccomp_filter *filter, knotif->state = SECCOMP_NOTIFY_REPLIED; knotif->error = resp.error; knotif->val = resp.val; + knotif->flags = resp.flags; complete(&knotif->ready); out: mutex_unlock(&filter->notify_lock); diff --git a/kernel/sys.c b/kernel/sys.c index a611d1d58c7d..a9331f101883 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1279,11 +1279,13 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { - struct oldold_utsname tmp = {}; + struct oldold_utsname tmp; if (!name) return -EFAULT; + memset(&tmp, 0, sizeof(tmp)); + down_read(&uts_sem); memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); @@ -1763,8 +1765,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unlock_task_sighand(p, &flags); out: - r->ru_utime = ns_to_timeval(utime); - r->ru_stime = ns_to_timeval(stime); + r->ru_utime = ns_to_kernel_old_timeval(utime); + r->ru_stime = ns_to_kernel_old_timeval(stime); if (who != RUSAGE_CHILDREN) { struct mm_struct *mm = get_task_mm(p); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 34b76895b81e..3b69a560a7ac 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -410,6 +410,29 @@ COND_SYSCALL(send); COND_SYSCALL(bdflush); COND_SYSCALL(uselib); +/* optional: time32 */ +COND_SYSCALL(time32); +COND_SYSCALL(stime32); +COND_SYSCALL(utime32); +COND_SYSCALL(adjtimex_time32); +COND_SYSCALL(sched_rr_get_interval_time32); +COND_SYSCALL(nanosleep_time32); +COND_SYSCALL(rt_sigtimedwait_time32); +COND_SYSCALL_COMPAT(rt_sigtimedwait_time32); +COND_SYSCALL(timer_settime32); +COND_SYSCALL(timer_gettime32); +COND_SYSCALL(clock_settime32); +COND_SYSCALL(clock_gettime32); +COND_SYSCALL(clock_getres_time32); +COND_SYSCALL(clock_nanosleep_time32); +COND_SYSCALL(utimes_time32); +COND_SYSCALL(futimesat_time32); +COND_SYSCALL(pselect6_time32); +COND_SYSCALL_COMPAT(pselect6_time32); +COND_SYSCALL(ppoll_time32); +COND_SYSCALL_COMPAT(ppoll_time32); +COND_SYSCALL(utimensat_time32); +COND_SYSCALL(clock_adjtime32); /* * The syscalls below are not found in include/uapi/asm-generic/unistd.h diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b6f2f35d0bcf..70665934d53e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1466,7 +1466,7 @@ static struct ctl_table vm_table[] = { .procname = "drop_caches", .data = &sysctl_drop_caches, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = SYSCTL_ONE, .extra2 = &four, diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 73c132095a7b..7d550cc76a3b 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -18,1317 +18,12 @@ #include <linux/slab.h> #include <linux/compat.h> -#ifdef CONFIG_SYSCTL_SYSCALL - -struct bin_table; -typedef ssize_t bin_convert_t(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen); - -static bin_convert_t bin_dir; -static bin_convert_t bin_string; -static bin_convert_t bin_intvec; -static bin_convert_t bin_ulongvec; -static bin_convert_t bin_uuid; -static bin_convert_t bin_dn_node_address; - -#define CTL_DIR bin_dir -#define CTL_STR bin_string -#define CTL_INT bin_intvec -#define CTL_ULONG bin_ulongvec -#define CTL_UUID bin_uuid -#define CTL_DNADR bin_dn_node_address - -#define BUFSZ 256 - -struct bin_table { - bin_convert_t *convert; - int ctl_name; - const char *procname; - const struct bin_table *child; -}; - -static const struct bin_table bin_random_table[] = { - { CTL_INT, RANDOM_POOLSIZE, "poolsize" }, - { CTL_INT, RANDOM_ENTROPY_COUNT, "entropy_avail" }, - { CTL_INT, RANDOM_READ_THRESH, "read_wakeup_threshold" }, - { CTL_INT, RANDOM_WRITE_THRESH, "write_wakeup_threshold" }, - { CTL_UUID, RANDOM_BOOT_ID, "boot_id" }, - { CTL_UUID, RANDOM_UUID, "uuid" }, - {} -}; - -static const struct bin_table bin_pty_table[] = { - { CTL_INT, PTY_MAX, "max" }, - { CTL_INT, PTY_NR, "nr" }, - {} -}; - -static const struct bin_table bin_kern_table[] = { - { CTL_STR, KERN_OSTYPE, "ostype" }, - { CTL_STR, KERN_OSRELEASE, "osrelease" }, - /* KERN_OSREV not used */ - { CTL_STR, KERN_VERSION, "version" }, - /* KERN_SECUREMASK not used */ - /* KERN_PROF not used */ - { CTL_STR, KERN_NODENAME, "hostname" }, - { CTL_STR, KERN_DOMAINNAME, "domainname" }, - - { CTL_INT, KERN_PANIC, "panic" }, - { CTL_INT, KERN_REALROOTDEV, "real-root-dev" }, - - { CTL_STR, KERN_SPARC_REBOOT, "reboot-cmd" }, - { CTL_INT, KERN_CTLALTDEL, "ctrl-alt-del" }, - { CTL_INT, KERN_PRINTK, "printk" }, - - /* KERN_NAMETRANS not used */ - /* KERN_PPC_HTABRECLAIM not used */ - /* KERN_PPC_ZEROPAGED not used */ - { CTL_INT, KERN_PPC_POWERSAVE_NAP, "powersave-nap" }, - - { CTL_STR, KERN_MODPROBE, "modprobe" }, - { CTL_INT, KERN_SG_BIG_BUFF, "sg-big-buff" }, - { CTL_INT, KERN_ACCT, "acct" }, - /* KERN_PPC_L2CR "l2cr" no longer used */ - - /* KERN_RTSIGNR not used */ - /* KERN_RTSIGMAX not used */ - - { CTL_ULONG, KERN_SHMMAX, "shmmax" }, - { CTL_INT, KERN_MSGMAX, "msgmax" }, - { CTL_INT, KERN_MSGMNB, "msgmnb" }, - /* KERN_MSGPOOL not used*/ - { CTL_INT, KERN_SYSRQ, "sysrq" }, - { CTL_INT, KERN_MAX_THREADS, "threads-max" }, - { CTL_DIR, KERN_RANDOM, "random", bin_random_table }, - { CTL_ULONG, KERN_SHMALL, "shmall" }, - { CTL_INT, KERN_MSGMNI, "msgmni" }, - { CTL_INT, KERN_SEM, "sem" }, - { CTL_INT, KERN_SPARC_STOP_A, "stop-a" }, - { CTL_INT, KERN_SHMMNI, "shmmni" }, - - { CTL_INT, KERN_OVERFLOWUID, "overflowuid" }, - { CTL_INT, KERN_OVERFLOWGID, "overflowgid" }, - - { CTL_STR, KERN_HOTPLUG, "hotplug", }, - { CTL_INT, KERN_IEEE_EMULATION_WARNINGS, "ieee_emulation_warnings" }, - - { CTL_INT, KERN_S390_USER_DEBUG_LOGGING, "userprocess_debug" }, - { CTL_INT, KERN_CORE_USES_PID, "core_uses_pid" }, - /* KERN_TAINTED "tainted" no longer used */ - { CTL_INT, KERN_CADPID, "cad_pid" }, - { CTL_INT, KERN_PIDMAX, "pid_max" }, - { CTL_STR, KERN_CORE_PATTERN, "core_pattern" }, - { CTL_INT, KERN_PANIC_ON_OOPS, "panic_on_oops" }, - { CTL_INT, KERN_HPPA_PWRSW, "soft-power" }, - { CTL_INT, KERN_HPPA_UNALIGNED, "unaligned-trap" }, - - { CTL_INT, KERN_PRINTK_RATELIMIT, "printk_ratelimit" }, - { CTL_INT, KERN_PRINTK_RATELIMIT_BURST, "printk_ratelimit_burst" }, - - { CTL_DIR, KERN_PTY, "pty", bin_pty_table }, - { CTL_INT, KERN_NGROUPS_MAX, "ngroups_max" }, - { CTL_INT, KERN_SPARC_SCONS_PWROFF, "scons-poweroff" }, - /* KERN_HZ_TIMER "hz_timer" no longer used */ - { CTL_INT, KERN_UNKNOWN_NMI_PANIC, "unknown_nmi_panic" }, - { CTL_INT, KERN_BOOTLOADER_TYPE, "bootloader_type" }, - { CTL_INT, KERN_RANDOMIZE, "randomize_va_space" }, - - { CTL_INT, KERN_SPIN_RETRY, "spin_retry" }, - /* KERN_ACPI_VIDEO_FLAGS "acpi_video_flags" no longer used */ - { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, - { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, - { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, - { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, - { CTL_INT, KERN_PANIC_ON_WARN, "panic_on_warn" }, - { CTL_ULONG, KERN_PANIC_PRINT, "panic_print" }, - {} -}; - -static const struct bin_table bin_vm_table[] = { - { CTL_INT, VM_OVERCOMMIT_MEMORY, "overcommit_memory" }, - { CTL_INT, VM_PAGE_CLUSTER, "page-cluster" }, - { CTL_INT, VM_DIRTY_BACKGROUND, "dirty_background_ratio" }, - { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" }, - /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */ - /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */ - /* VM_NR_PDFLUSH_THREADS "nr_pdflush_threads" no longer used */ - { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" }, - /* VM_PAGEBUF unused */ - /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */ - { CTL_INT, VM_SWAPPINESS, "swappiness" }, - { CTL_INT, VM_LOWMEM_RESERVE_RATIO, "lowmem_reserve_ratio" }, - { CTL_INT, VM_MIN_FREE_KBYTES, "min_free_kbytes" }, - { CTL_INT, VM_MAX_MAP_COUNT, "max_map_count" }, - { CTL_INT, VM_LAPTOP_MODE, "laptop_mode" }, - { CTL_INT, VM_BLOCK_DUMP, "block_dump" }, - { CTL_INT, VM_HUGETLB_GROUP, "hugetlb_shm_group" }, - { CTL_INT, VM_VFS_CACHE_PRESSURE, "vfs_cache_pressure" }, - { CTL_INT, VM_LEGACY_VA_LAYOUT, "legacy_va_layout" }, - /* VM_SWAP_TOKEN_TIMEOUT unused */ - { CTL_INT, VM_DROP_PAGECACHE, "drop_caches" }, - { CTL_INT, VM_PERCPU_PAGELIST_FRACTION, "percpu_pagelist_fraction" }, - { CTL_INT, VM_ZONE_RECLAIM_MODE, "zone_reclaim_mode" }, - { CTL_INT, VM_MIN_UNMAPPED, "min_unmapped_ratio" }, - { CTL_INT, VM_PANIC_ON_OOM, "panic_on_oom" }, - { CTL_INT, VM_VDSO_ENABLED, "vdso_enabled" }, - { CTL_INT, VM_MIN_SLAB, "min_slab_ratio" }, - - {} -}; - -static const struct bin_table bin_net_core_table[] = { - { CTL_INT, NET_CORE_WMEM_MAX, "wmem_max" }, - { CTL_INT, NET_CORE_RMEM_MAX, "rmem_max" }, - { CTL_INT, NET_CORE_WMEM_DEFAULT, "wmem_default" }, - { CTL_INT, NET_CORE_RMEM_DEFAULT, "rmem_default" }, - /* NET_CORE_DESTROY_DELAY unused */ - { CTL_INT, NET_CORE_MAX_BACKLOG, "netdev_max_backlog" }, - /* NET_CORE_FASTROUTE unused */ - { CTL_INT, NET_CORE_MSG_COST, "message_cost" }, - { CTL_INT, NET_CORE_MSG_BURST, "message_burst" }, - { CTL_INT, NET_CORE_OPTMEM_MAX, "optmem_max" }, - /* NET_CORE_HOT_LIST_LENGTH unused */ - /* NET_CORE_DIVERT_VERSION unused */ - /* NET_CORE_NO_CONG_THRESH unused */ - /* NET_CORE_NO_CONG unused */ - /* NET_CORE_LO_CONG unused */ - /* NET_CORE_MOD_CONG unused */ - { CTL_INT, NET_CORE_DEV_WEIGHT, "dev_weight" }, - { CTL_INT, NET_CORE_SOMAXCONN, "somaxconn" }, - { CTL_INT, NET_CORE_BUDGET, "netdev_budget" }, - { CTL_INT, NET_CORE_AEVENT_ETIME, "xfrm_aevent_etime" }, - { CTL_INT, NET_CORE_AEVENT_RSEQTH, "xfrm_aevent_rseqth" }, - { CTL_INT, NET_CORE_WARNINGS, "warnings" }, - {}, -}; - -static const struct bin_table bin_net_unix_table[] = { - /* NET_UNIX_DESTROY_DELAY unused */ - /* NET_UNIX_DELETE_DELAY unused */ - { CTL_INT, NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen" }, - {} -}; - -static const struct bin_table bin_net_ipv4_route_table[] = { - { CTL_INT, NET_IPV4_ROUTE_FLUSH, "flush" }, - /* NET_IPV4_ROUTE_MIN_DELAY "min_delay" no longer used */ - /* NET_IPV4_ROUTE_MAX_DELAY "max_delay" no longer used */ - { CTL_INT, NET_IPV4_ROUTE_GC_THRESH, "gc_thresh" }, - { CTL_INT, NET_IPV4_ROUTE_MAX_SIZE, "max_size" }, - { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" }, - { CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" }, - { CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" }, - /* NET_IPV4_ROUTE_GC_INTERVAL "gc_interval" no longer used */ - { CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" }, - { CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" }, - { CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" }, - { CTL_INT, NET_IPV4_ROUTE_ERROR_COST, "error_cost" }, - { CTL_INT, NET_IPV4_ROUTE_ERROR_BURST, "error_burst" }, - { CTL_INT, NET_IPV4_ROUTE_GC_ELASTICITY, "gc_elasticity" }, - { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" }, - { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" }, - { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" }, - {} -}; - -static const struct bin_table bin_net_ipv4_conf_vars_table[] = { - { CTL_INT, NET_IPV4_CONF_FORWARDING, "forwarding" }, - { CTL_INT, NET_IPV4_CONF_MC_FORWARDING, "mc_forwarding" }, - - { CTL_INT, NET_IPV4_CONF_ACCEPT_REDIRECTS, "accept_redirects" }, - { CTL_INT, NET_IPV4_CONF_SECURE_REDIRECTS, "secure_redirects" }, - { CTL_INT, NET_IPV4_CONF_SEND_REDIRECTS, "send_redirects" }, - { CTL_INT, NET_IPV4_CONF_SHARED_MEDIA, "shared_media" }, - { CTL_INT, NET_IPV4_CONF_RP_FILTER, "rp_filter" }, - { CTL_INT, NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE, "accept_source_route" }, - { CTL_INT, NET_IPV4_CONF_PROXY_ARP, "proxy_arp" }, - { CTL_INT, NET_IPV4_CONF_MEDIUM_ID, "medium_id" }, - { CTL_INT, NET_IPV4_CONF_BOOTP_RELAY, "bootp_relay" }, - { CTL_INT, NET_IPV4_CONF_LOG_MARTIANS, "log_martians" }, - { CTL_INT, NET_IPV4_CONF_TAG, "tag" }, - { CTL_INT, NET_IPV4_CONF_ARPFILTER, "arp_filter" }, - { CTL_INT, NET_IPV4_CONF_ARP_ANNOUNCE, "arp_announce" }, - { CTL_INT, NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" }, - { CTL_INT, NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" }, - { CTL_INT, NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" }, - - { CTL_INT, NET_IPV4_CONF_NOXFRM, "disable_xfrm" }, - { CTL_INT, NET_IPV4_CONF_NOPOLICY, "disable_policy" }, - { CTL_INT, NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version" }, - { CTL_INT, NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" }, - {} -}; - -static const struct bin_table bin_net_ipv4_conf_table[] = { - { CTL_DIR, NET_PROTO_CONF_ALL, "all", bin_net_ipv4_conf_vars_table }, - { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_ipv4_conf_vars_table }, - { CTL_DIR, 0, NULL, bin_net_ipv4_conf_vars_table }, - {} -}; - -static const struct bin_table bin_net_neigh_vars_table[] = { - { CTL_INT, NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" }, - { CTL_INT, NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" }, - { CTL_INT, NET_NEIGH_APP_SOLICIT, "app_solicit" }, - /* NET_NEIGH_RETRANS_TIME "retrans_time" no longer used */ - { CTL_INT, NET_NEIGH_REACHABLE_TIME, "base_reachable_time" }, - { CTL_INT, NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time" }, - { CTL_INT, NET_NEIGH_GC_STALE_TIME, "gc_stale_time" }, - { CTL_INT, NET_NEIGH_UNRES_QLEN, "unres_qlen" }, - { CTL_INT, NET_NEIGH_PROXY_QLEN, "proxy_qlen" }, - /* NET_NEIGH_ANYCAST_DELAY "anycast_delay" no longer used */ - /* NET_NEIGH_PROXY_DELAY "proxy_delay" no longer used */ - /* NET_NEIGH_LOCKTIME "locktime" no longer used */ - { CTL_INT, NET_NEIGH_GC_INTERVAL, "gc_interval" }, - { CTL_INT, NET_NEIGH_GC_THRESH1, "gc_thresh1" }, - { CTL_INT, NET_NEIGH_GC_THRESH2, "gc_thresh2" }, - { CTL_INT, NET_NEIGH_GC_THRESH3, "gc_thresh3" }, - { CTL_INT, NET_NEIGH_RETRANS_TIME_MS, "retrans_time_ms" }, - { CTL_INT, NET_NEIGH_REACHABLE_TIME_MS, "base_reachable_time_ms" }, - {} -}; - -static const struct bin_table bin_net_neigh_table[] = { - { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_neigh_vars_table }, - { CTL_DIR, 0, NULL, bin_net_neigh_vars_table }, - {} -}; - -static const struct bin_table bin_net_ipv4_netfilter_table[] = { - { CTL_INT, NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max" }, - - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT "ip_conntrack_tcp_timeout_syn_sent" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV "ip_conntrack_tcp_timeout_syn_recv" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED "ip_conntrack_tcp_timeout_established" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT "ip_conntrack_tcp_timeout_fin_wait" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT "ip_conntrack_tcp_timeout_close_wait" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK "ip_conntrack_tcp_timeout_last_ack" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT "ip_conntrack_tcp_timeout_time_wait" no longer used */ - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE "ip_conntrack_tcp_timeout_close" no longer used */ - - /* NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT "ip_conntrack_udp_timeout" no longer used */ - /* NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM "ip_conntrack_udp_timeout_stream" no longer used */ - /* NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT "ip_conntrack_icmp_timeout" no longer used */ - /* NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT "ip_conntrack_generic_timeout" no longer used */ - - { CTL_INT, NET_IPV4_NF_CONNTRACK_BUCKETS, "ip_conntrack_buckets" }, - { CTL_INT, NET_IPV4_NF_CONNTRACK_LOG_INVALID, "ip_conntrack_log_invalid" }, - /* NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS "ip_conntrack_tcp_timeout_max_retrans" no longer used */ - { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_LOOSE, "ip_conntrack_tcp_loose" }, - { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, "ip_conntrack_tcp_be_liberal" }, - { CTL_INT, NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, "ip_conntrack_tcp_max_retrans" }, - - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED "ip_conntrack_sctp_timeout_closed" no longer used */ - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT "ip_conntrack_sctp_timeout_cookie_wait" no longer used */ - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED "ip_conntrack_sctp_timeout_cookie_echoed" no longer used */ - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED "ip_conntrack_sctp_timeout_established" no longer used */ - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT "ip_conntrack_sctp_timeout_shutdown_sent" no longer used */ - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD "ip_conntrack_sctp_timeout_shutdown_recd" no longer used */ - /* NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT "ip_conntrack_sctp_timeout_shutdown_ack_sent" no longer used */ - - { CTL_INT, NET_IPV4_NF_CONNTRACK_COUNT, "ip_conntrack_count" }, - { CTL_INT, NET_IPV4_NF_CONNTRACK_CHECKSUM, "ip_conntrack_checksum" }, - {} -}; - -static const struct bin_table bin_net_ipv4_table[] = { - {CTL_INT, NET_IPV4_FORWARD, "ip_forward" }, - - { CTL_DIR, NET_IPV4_CONF, "conf", bin_net_ipv4_conf_table }, - { CTL_DIR, NET_IPV4_NEIGH, "neigh", bin_net_neigh_table }, - { CTL_DIR, NET_IPV4_ROUTE, "route", bin_net_ipv4_route_table }, - /* NET_IPV4_FIB_HASH unused */ - { CTL_DIR, NET_IPV4_NETFILTER, "netfilter", bin_net_ipv4_netfilter_table }, - - { CTL_INT, NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps" }, - { CTL_INT, NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling" }, - { CTL_INT, NET_IPV4_TCP_SACK, "tcp_sack" }, - { CTL_INT, NET_IPV4_TCP_RETRANS_COLLAPSE, "tcp_retrans_collapse" }, - { CTL_INT, NET_IPV4_DEFAULT_TTL, "ip_default_ttl" }, - /* NET_IPV4_AUTOCONFIG unused */ - { CTL_INT, NET_IPV4_NO_PMTU_DISC, "ip_no_pmtu_disc" }, - { CTL_INT, NET_IPV4_NONLOCAL_BIND, "ip_nonlocal_bind" }, - { CTL_INT, NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries" }, - { CTL_INT, NET_TCP_SYNACK_RETRIES, "tcp_synack_retries" }, - { CTL_INT, NET_TCP_MAX_ORPHANS, "tcp_max_orphans" }, - { CTL_INT, NET_TCP_MAX_TW_BUCKETS, "tcp_max_tw_buckets" }, - { CTL_INT, NET_IPV4_DYNADDR, "ip_dynaddr" }, - { CTL_INT, NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time" }, - { CTL_INT, NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes" }, - { CTL_INT, NET_IPV4_TCP_KEEPALIVE_INTVL, "tcp_keepalive_intvl" }, - { CTL_INT, NET_IPV4_TCP_RETRIES1, "tcp_retries1" }, - { CTL_INT, NET_IPV4_TCP_RETRIES2, "tcp_retries2" }, - { CTL_INT, NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout" }, - { CTL_INT, NET_TCP_SYNCOOKIES, "tcp_syncookies" }, - { CTL_INT, NET_TCP_TW_RECYCLE, "tcp_tw_recycle" }, - { CTL_INT, NET_TCP_ABORT_ON_OVERFLOW, "tcp_abort_on_overflow" }, - { CTL_INT, NET_TCP_STDURG, "tcp_stdurg" }, - { CTL_INT, NET_TCP_RFC1337, "tcp_rfc1337" }, - { CTL_INT, NET_TCP_MAX_SYN_BACKLOG, "tcp_max_syn_backlog" }, - { CTL_INT, NET_IPV4_LOCAL_PORT_RANGE, "ip_local_port_range" }, - { CTL_INT, NET_IPV4_IGMP_MAX_MEMBERSHIPS, "igmp_max_memberships" }, - { CTL_INT, NET_IPV4_IGMP_MAX_MSF, "igmp_max_msf" }, - { CTL_INT, NET_IPV4_INET_PEER_THRESHOLD, "inet_peer_threshold" }, - { CTL_INT, NET_IPV4_INET_PEER_MINTTL, "inet_peer_minttl" }, - { CTL_INT, NET_IPV4_INET_PEER_MAXTTL, "inet_peer_maxttl" }, - { CTL_INT, NET_IPV4_INET_PEER_GC_MINTIME, "inet_peer_gc_mintime" }, - { CTL_INT, NET_IPV4_INET_PEER_GC_MAXTIME, "inet_peer_gc_maxtime" }, - { CTL_INT, NET_TCP_ORPHAN_RETRIES, "tcp_orphan_retries" }, - { CTL_INT, NET_TCP_FACK, "tcp_fack" }, - { CTL_INT, NET_TCP_REORDERING, "tcp_reordering" }, - { CTL_INT, NET_TCP_ECN, "tcp_ecn" }, - { CTL_INT, NET_TCP_DSACK, "tcp_dsack" }, - { CTL_INT, NET_TCP_MEM, "tcp_mem" }, - { CTL_INT, NET_TCP_WMEM, "tcp_wmem" }, - { CTL_INT, NET_TCP_RMEM, "tcp_rmem" }, - { CTL_INT, NET_TCP_APP_WIN, "tcp_app_win" }, - { CTL_INT, NET_TCP_ADV_WIN_SCALE, "tcp_adv_win_scale" }, - { CTL_INT, NET_TCP_TW_REUSE, "tcp_tw_reuse" }, - { CTL_INT, NET_TCP_FRTO, "tcp_frto" }, - { CTL_INT, NET_TCP_FRTO_RESPONSE, "tcp_frto_response" }, - { CTL_INT, NET_TCP_LOW_LATENCY, "tcp_low_latency" }, - { CTL_INT, NET_TCP_NO_METRICS_SAVE, "tcp_no_metrics_save" }, - { CTL_INT, NET_TCP_MODERATE_RCVBUF, "tcp_moderate_rcvbuf" }, - { CTL_INT, NET_TCP_TSO_WIN_DIVISOR, "tcp_tso_win_divisor" }, - { CTL_STR, NET_TCP_CONG_CONTROL, "tcp_congestion_control" }, - { CTL_INT, NET_TCP_MTU_PROBING, "tcp_mtu_probing" }, - { CTL_INT, NET_TCP_BASE_MSS, "tcp_base_mss" }, - { CTL_INT, NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, "tcp_workaround_signed_windows" }, - { CTL_INT, NET_TCP_SLOW_START_AFTER_IDLE, "tcp_slow_start_after_idle" }, - { CTL_INT, NET_CIPSOV4_CACHE_ENABLE, "cipso_cache_enable" }, - { CTL_INT, NET_CIPSOV4_CACHE_BUCKET_SIZE, "cipso_cache_bucket_size" }, - { CTL_INT, NET_CIPSOV4_RBM_OPTFMT, "cipso_rbm_optfmt" }, - { CTL_INT, NET_CIPSOV4_RBM_STRICTVALID, "cipso_rbm_strictvalid" }, - /* NET_TCP_AVAIL_CONG_CONTROL "tcp_available_congestion_control" no longer used */ - { CTL_STR, NET_TCP_ALLOWED_CONG_CONTROL, "tcp_allowed_congestion_control" }, - { CTL_INT, NET_TCP_MAX_SSTHRESH, "tcp_max_ssthresh" }, - - { CTL_INT, NET_IPV4_ICMP_ECHO_IGNORE_ALL, "icmp_echo_ignore_all" }, - { CTL_INT, NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, "icmp_echo_ignore_broadcasts" }, - { CTL_INT, NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, "icmp_ignore_bogus_error_responses" }, - { CTL_INT, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, "icmp_errors_use_inbound_ifaddr" }, - { CTL_INT, NET_IPV4_ICMP_RATELIMIT, "icmp_ratelimit" }, - { CTL_INT, NET_IPV4_ICMP_RATEMASK, "icmp_ratemask" }, - - { CTL_INT, NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh" }, - { CTL_INT, NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh" }, - { CTL_INT, NET_IPV4_IPFRAG_TIME, "ipfrag_time" }, - - { CTL_INT, NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" }, - /* NET_IPV4_IPFRAG_MAX_DIST "ipfrag_max_dist" no longer used */ - - { CTL_INT, 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" }, - - /* NET_TCP_DEFAULT_WIN_SCALE unused */ - /* NET_TCP_BIC_BETA unused */ - /* NET_IPV4_TCP_MAX_KA_PROBES unused */ - /* NET_IPV4_IP_MASQ_DEBUG unused */ - /* NET_TCP_SYN_TAILDROP unused */ - /* NET_IPV4_ICMP_SOURCEQUENCH_RATE unused */ - /* NET_IPV4_ICMP_DESTUNREACH_RATE unused */ - /* NET_IPV4_ICMP_TIMEEXCEED_RATE unused */ - /* NET_IPV4_ICMP_PARAMPROB_RATE unused */ - /* NET_IPV4_ICMP_ECHOREPLY_RATE unused */ - /* NET_IPV4_ALWAYS_DEFRAG unused */ - {} -}; - -static const struct bin_table bin_net_ipx_table[] = { - { CTL_INT, NET_IPX_PPROP_BROADCASTING, "ipx_pprop_broadcasting" }, - /* NET_IPX_FORWARDING unused */ - {} -}; - -static const struct bin_table bin_net_atalk_table[] = { - { CTL_INT, NET_ATALK_AARP_EXPIRY_TIME, "aarp-expiry-time" }, - { CTL_INT, NET_ATALK_AARP_TICK_TIME, "aarp-tick-time" }, - { CTL_INT, NET_ATALK_AARP_RETRANSMIT_LIMIT, "aarp-retransmit-limit" }, - { CTL_INT, NET_ATALK_AARP_RESOLVE_TIME, "aarp-resolve-time" }, - {}, -}; - -static const struct bin_table bin_net_netrom_table[] = { - { CTL_INT, NET_NETROM_DEFAULT_PATH_QUALITY, "default_path_quality" }, - { CTL_INT, NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, "obsolescence_count_initialiser" }, - { CTL_INT, NET_NETROM_NETWORK_TTL_INITIALISER, "network_ttl_initialiser" }, - { CTL_INT, NET_NETROM_TRANSPORT_TIMEOUT, "transport_timeout" }, - { CTL_INT, NET_NETROM_TRANSPORT_MAXIMUM_TRIES, "transport_maximum_tries" }, - { CTL_INT, NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, "transport_acknowledge_delay" }, - { CTL_INT, NET_NETROM_TRANSPORT_BUSY_DELAY, "transport_busy_delay" }, - { CTL_INT, NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, "transport_requested_window_size" }, - { CTL_INT, NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, "transport_no_activity_timeout" }, - { CTL_INT, NET_NETROM_ROUTING_CONTROL, "routing_control" }, - { CTL_INT, NET_NETROM_LINK_FAILS_COUNT, "link_fails_count" }, - { CTL_INT, NET_NETROM_RESET, "reset" }, - {} -}; - -static const struct bin_table bin_net_ax25_param_table[] = { - { CTL_INT, NET_AX25_IP_DEFAULT_MODE, "ip_default_mode" }, - { CTL_INT, NET_AX25_DEFAULT_MODE, "ax25_default_mode" }, - { CTL_INT, NET_AX25_BACKOFF_TYPE, "backoff_type" }, - { CTL_INT, NET_AX25_CONNECT_MODE, "connect_mode" }, - { CTL_INT, NET_AX25_STANDARD_WINDOW, "standard_window_size" }, - { CTL_INT, NET_AX25_EXTENDED_WINDOW, "extended_window_size" }, - { CTL_INT, NET_AX25_T1_TIMEOUT, "t1_timeout" }, - { CTL_INT, NET_AX25_T2_TIMEOUT, "t2_timeout" }, - { CTL_INT, NET_AX25_T3_TIMEOUT, "t3_timeout" }, - { CTL_INT, NET_AX25_IDLE_TIMEOUT, "idle_timeout" }, - { CTL_INT, NET_AX25_N2, "maximum_retry_count" }, - { CTL_INT, NET_AX25_PACLEN, "maximum_packet_length" }, - { CTL_INT, NET_AX25_PROTOCOL, "protocol" }, - { CTL_INT, NET_AX25_DAMA_SLAVE_TIMEOUT, "dama_slave_timeout" }, - {} -}; - -static const struct bin_table bin_net_ax25_table[] = { - { CTL_DIR, 0, NULL, bin_net_ax25_param_table }, - {} -}; - -static const struct bin_table bin_net_rose_table[] = { - { CTL_INT, NET_ROSE_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, - { CTL_INT, NET_ROSE_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, - { CTL_INT, NET_ROSE_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, - { CTL_INT, NET_ROSE_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" }, - { CTL_INT, NET_ROSE_ACK_HOLD_BACK_TIMEOUT, "acknowledge_hold_back_timeout" }, - { CTL_INT, NET_ROSE_ROUTING_CONTROL, "routing_control" }, - { CTL_INT, NET_ROSE_LINK_FAIL_TIMEOUT, "link_fail_timeout" }, - { CTL_INT, NET_ROSE_MAX_VCS, "maximum_virtual_circuits" }, - { CTL_INT, NET_ROSE_WINDOW_SIZE, "window_size" }, - { CTL_INT, NET_ROSE_NO_ACTIVITY_TIMEOUT, "no_activity_timeout" }, - {} -}; - -static const struct bin_table bin_net_ipv6_conf_var_table[] = { - { CTL_INT, NET_IPV6_FORWARDING, "forwarding" }, - { CTL_INT, NET_IPV6_HOP_LIMIT, "hop_limit" }, - { CTL_INT, NET_IPV6_MTU, "mtu" }, - { CTL_INT, NET_IPV6_ACCEPT_RA, "accept_ra" }, - { CTL_INT, NET_IPV6_ACCEPT_REDIRECTS, "accept_redirects" }, - { CTL_INT, NET_IPV6_AUTOCONF, "autoconf" }, - { CTL_INT, NET_IPV6_DAD_TRANSMITS, "dad_transmits" }, - { CTL_INT, NET_IPV6_RTR_SOLICITS, "router_solicitations" }, - { CTL_INT, NET_IPV6_RTR_SOLICIT_INTERVAL, "router_solicitation_interval" }, - { CTL_INT, NET_IPV6_RTR_SOLICIT_DELAY, "router_solicitation_delay" }, - { CTL_INT, NET_IPV6_USE_TEMPADDR, "use_tempaddr" }, - { CTL_INT, NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft" }, - { CTL_INT, NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft" }, - { CTL_INT, NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry" }, - { CTL_INT, NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor" }, - { CTL_INT, NET_IPV6_MAX_ADDRESSES, "max_addresses" }, - { CTL_INT, NET_IPV6_FORCE_MLD_VERSION, "force_mld_version" }, - { CTL_INT, NET_IPV6_ACCEPT_RA_DEFRTR, "accept_ra_defrtr" }, - { CTL_INT, NET_IPV6_ACCEPT_RA_PINFO, "accept_ra_pinfo" }, - { CTL_INT, NET_IPV6_ACCEPT_RA_RTR_PREF, "accept_ra_rtr_pref" }, - { CTL_INT, NET_IPV6_RTR_PROBE_INTERVAL, "router_probe_interval" }, - { CTL_INT, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" }, - { CTL_INT, NET_IPV6_PROXY_NDP, "proxy_ndp" }, - { CTL_INT, NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" }, - { CTL_INT, NET_IPV6_ACCEPT_RA_FROM_LOCAL, "accept_ra_from_local" }, - {} -}; - -static const struct bin_table bin_net_ipv6_conf_table[] = { - { CTL_DIR, NET_PROTO_CONF_ALL, "all", bin_net_ipv6_conf_var_table }, - { CTL_DIR, NET_PROTO_CONF_DEFAULT, "default", bin_net_ipv6_conf_var_table }, - { CTL_DIR, 0, NULL, bin_net_ipv6_conf_var_table }, - {} -}; - -static const struct bin_table bin_net_ipv6_route_table[] = { - /* NET_IPV6_ROUTE_FLUSH "flush" no longer used */ - { CTL_INT, NET_IPV6_ROUTE_GC_THRESH, "gc_thresh" }, - { CTL_INT, NET_IPV6_ROUTE_MAX_SIZE, "max_size" }, - { CTL_INT, NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" }, - { CTL_INT, NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout" }, - { CTL_INT, NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval" }, - { CTL_INT, NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity" }, - { CTL_INT, NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires" }, - { CTL_INT, NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss" }, - { CTL_INT, NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" }, - {} -}; - -static const struct bin_table bin_net_ipv6_icmp_table[] = { - { CTL_INT, NET_IPV6_ICMP_RATELIMIT, "ratelimit" }, - {} -}; - -static const struct bin_table bin_net_ipv6_table[] = { - { CTL_DIR, NET_IPV6_CONF, "conf", bin_net_ipv6_conf_table }, - { CTL_DIR, NET_IPV6_NEIGH, "neigh", bin_net_neigh_table }, - { CTL_DIR, NET_IPV6_ROUTE, "route", bin_net_ipv6_route_table }, - { CTL_DIR, NET_IPV6_ICMP, "icmp", bin_net_ipv6_icmp_table }, - { CTL_INT, NET_IPV6_BINDV6ONLY, "bindv6only" }, - { CTL_INT, NET_IPV6_IP6FRAG_HIGH_THRESH, "ip6frag_high_thresh" }, - { CTL_INT, NET_IPV6_IP6FRAG_LOW_THRESH, "ip6frag_low_thresh" }, - { CTL_INT, NET_IPV6_IP6FRAG_TIME, "ip6frag_time" }, - { CTL_INT, NET_IPV6_IP6FRAG_SECRET_INTERVAL, "ip6frag_secret_interval" }, - { CTL_INT, NET_IPV6_MLD_MAX_MSF, "mld_max_msf" }, - { CTL_INT, 2088 /* IPQ_QMAX */, "ip6_queue_maxlen" }, - {} -}; - -static const struct bin_table bin_net_x25_table[] = { - { CTL_INT, NET_X25_RESTART_REQUEST_TIMEOUT, "restart_request_timeout" }, - { CTL_INT, NET_X25_CALL_REQUEST_TIMEOUT, "call_request_timeout" }, - { CTL_INT, NET_X25_RESET_REQUEST_TIMEOUT, "reset_request_timeout" }, - { CTL_INT, NET_X25_CLEAR_REQUEST_TIMEOUT, "clear_request_timeout" }, - { CTL_INT, NET_X25_ACK_HOLD_BACK_TIMEOUT, "acknowledgement_hold_back_timeout" }, - { CTL_INT, NET_X25_FORWARD, "x25_forward" }, - {} -}; - -static const struct bin_table bin_net_tr_table[] = { - { CTL_INT, NET_TR_RIF_TIMEOUT, "rif_timeout" }, - {} -}; - - -static const struct bin_table bin_net_decnet_conf_vars[] = { - { CTL_INT, NET_DECNET_CONF_DEV_FORWARDING, "forwarding" }, - { CTL_INT, NET_DECNET_CONF_DEV_PRIORITY, "priority" }, - { CTL_INT, NET_DECNET_CONF_DEV_T2, "t2" }, - { CTL_INT, NET_DECNET_CONF_DEV_T3, "t3" }, - {} -}; - -static const struct bin_table bin_net_decnet_conf[] = { - { CTL_DIR, NET_DECNET_CONF_ETHER, "ethernet", bin_net_decnet_conf_vars }, - { CTL_DIR, NET_DECNET_CONF_GRE, "ipgre", bin_net_decnet_conf_vars }, - { CTL_DIR, NET_DECNET_CONF_X25, "x25", bin_net_decnet_conf_vars }, - { CTL_DIR, NET_DECNET_CONF_PPP, "ppp", bin_net_decnet_conf_vars }, - { CTL_DIR, NET_DECNET_CONF_DDCMP, "ddcmp", bin_net_decnet_conf_vars }, - { CTL_DIR, NET_DECNET_CONF_LOOPBACK, "loopback", bin_net_decnet_conf_vars }, - { CTL_DIR, 0, NULL, bin_net_decnet_conf_vars }, - {} -}; - -static const struct bin_table bin_net_decnet_table[] = { - { CTL_DIR, NET_DECNET_CONF, "conf", bin_net_decnet_conf }, - { CTL_DNADR, NET_DECNET_NODE_ADDRESS, "node_address" }, - { CTL_STR, NET_DECNET_NODE_NAME, "node_name" }, - { CTL_STR, NET_DECNET_DEFAULT_DEVICE, "default_device" }, - { CTL_INT, NET_DECNET_TIME_WAIT, "time_wait" }, - { CTL_INT, NET_DECNET_DN_COUNT, "dn_count" }, - { CTL_INT, NET_DECNET_DI_COUNT, "di_count" }, - { CTL_INT, NET_DECNET_DR_COUNT, "dr_count" }, - { CTL_INT, NET_DECNET_DST_GC_INTERVAL, "dst_gc_interval" }, - { CTL_INT, NET_DECNET_NO_FC_MAX_CWND, "no_fc_max_cwnd" }, - { CTL_INT, NET_DECNET_MEM, "decnet_mem" }, - { CTL_INT, NET_DECNET_RMEM, "decnet_rmem" }, - { CTL_INT, NET_DECNET_WMEM, "decnet_wmem" }, - { CTL_INT, NET_DECNET_DEBUG_LEVEL, "debug" }, - {} -}; - -static const struct bin_table bin_net_sctp_table[] = { - { CTL_INT, NET_SCTP_RTO_INITIAL, "rto_initial" }, - { CTL_INT, NET_SCTP_RTO_MIN, "rto_min" }, - { CTL_INT, NET_SCTP_RTO_MAX, "rto_max" }, - { CTL_INT, NET_SCTP_RTO_ALPHA, "rto_alpha_exp_divisor" }, - { CTL_INT, NET_SCTP_RTO_BETA, "rto_beta_exp_divisor" }, - { CTL_INT, NET_SCTP_VALID_COOKIE_LIFE, "valid_cookie_life" }, - { CTL_INT, NET_SCTP_ASSOCIATION_MAX_RETRANS, "association_max_retrans" }, - { CTL_INT, NET_SCTP_PATH_MAX_RETRANS, "path_max_retrans" }, - { CTL_INT, NET_SCTP_MAX_INIT_RETRANSMITS, "max_init_retransmits" }, - { CTL_INT, NET_SCTP_HB_INTERVAL, "hb_interval" }, - { CTL_INT, NET_SCTP_PRESERVE_ENABLE, "cookie_preserve_enable" }, - { CTL_INT, NET_SCTP_MAX_BURST, "max_burst" }, - { CTL_INT, NET_SCTP_ADDIP_ENABLE, "addip_enable" }, - { CTL_INT, NET_SCTP_PRSCTP_ENABLE, "prsctp_enable" }, - { CTL_INT, NET_SCTP_SNDBUF_POLICY, "sndbuf_policy" }, - { CTL_INT, NET_SCTP_SACK_TIMEOUT, "sack_timeout" }, - { CTL_INT, NET_SCTP_RCVBUF_POLICY, "rcvbuf_policy" }, - {} -}; - -static const struct bin_table bin_net_llc_llc2_timeout_table[] = { - { CTL_INT, NET_LLC2_ACK_TIMEOUT, "ack" }, - { CTL_INT, NET_LLC2_P_TIMEOUT, "p" }, - { CTL_INT, NET_LLC2_REJ_TIMEOUT, "rej" }, - { CTL_INT, NET_LLC2_BUSY_TIMEOUT, "busy" }, - {} -}; - -static const struct bin_table bin_net_llc_station_table[] = { - { CTL_INT, NET_LLC_STATION_ACK_TIMEOUT, "ack_timeout" }, - {} -}; - -static const struct bin_table bin_net_llc_llc2_table[] = { - { CTL_DIR, NET_LLC2, "timeout", bin_net_llc_llc2_timeout_table }, - {} -}; - -static const struct bin_table bin_net_llc_table[] = { - { CTL_DIR, NET_LLC2, "llc2", bin_net_llc_llc2_table }, - { CTL_DIR, NET_LLC_STATION, "station", bin_net_llc_station_table }, - {} -}; - -static const struct bin_table bin_net_netfilter_table[] = { - { CTL_INT, NET_NF_CONNTRACK_MAX, "nf_conntrack_max" }, - /* NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT "nf_conntrack_tcp_timeout_syn_sent" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV "nf_conntrack_tcp_timeout_syn_recv" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED "nf_conntrack_tcp_timeout_established" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT "nf_conntrack_tcp_timeout_fin_wait" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT "nf_conntrack_tcp_timeout_close_wait" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK "nf_conntrack_tcp_timeout_last_ack" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT "nf_conntrack_tcp_timeout_time_wait" no longer used */ - /* NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE "nf_conntrack_tcp_timeout_close" no longer used */ - /* NET_NF_CONNTRACK_UDP_TIMEOUT "nf_conntrack_udp_timeout" no longer used */ - /* NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM "nf_conntrack_udp_timeout_stream" no longer used */ - /* NET_NF_CONNTRACK_ICMP_TIMEOUT "nf_conntrack_icmp_timeout" no longer used */ - /* NET_NF_CONNTRACK_GENERIC_TIMEOUT "nf_conntrack_generic_timeout" no longer used */ - { CTL_INT, NET_NF_CONNTRACK_BUCKETS, "nf_conntrack_buckets" }, - { CTL_INT, NET_NF_CONNTRACK_LOG_INVALID, "nf_conntrack_log_invalid" }, - /* NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS "nf_conntrack_tcp_timeout_max_retrans" no longer used */ - { CTL_INT, NET_NF_CONNTRACK_TCP_LOOSE, "nf_conntrack_tcp_loose" }, - { CTL_INT, NET_NF_CONNTRACK_TCP_BE_LIBERAL, "nf_conntrack_tcp_be_liberal" }, - { CTL_INT, NET_NF_CONNTRACK_TCP_MAX_RETRANS, "nf_conntrack_tcp_max_retrans" }, - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED "nf_conntrack_sctp_timeout_closed" no longer used */ - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT "nf_conntrack_sctp_timeout_cookie_wait" no longer used */ - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED "nf_conntrack_sctp_timeout_cookie_echoed" no longer used */ - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED "nf_conntrack_sctp_timeout_established" no longer used */ - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT "nf_conntrack_sctp_timeout_shutdown_sent" no longer used */ - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD "nf_conntrack_sctp_timeout_shutdown_recd" no longer used */ - /* NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT "nf_conntrack_sctp_timeout_shutdown_ack_sent" no longer used */ - { CTL_INT, NET_NF_CONNTRACK_COUNT, "nf_conntrack_count" }, - /* NET_NF_CONNTRACK_ICMPV6_TIMEOUT "nf_conntrack_icmpv6_timeout" no longer used */ - /* NET_NF_CONNTRACK_FRAG6_TIMEOUT "nf_conntrack_frag6_timeout" no longer used */ - { CTL_INT, NET_NF_CONNTRACK_FRAG6_LOW_THRESH, "nf_conntrack_frag6_low_thresh" }, - { CTL_INT, NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, "nf_conntrack_frag6_high_thresh" }, - { CTL_INT, NET_NF_CONNTRACK_CHECKSUM, "nf_conntrack_checksum" }, - - {} -}; - -static const struct bin_table bin_net_table[] = { - { CTL_DIR, NET_CORE, "core", bin_net_core_table }, - /* NET_ETHER not used */ - /* NET_802 not used */ - { CTL_DIR, NET_UNIX, "unix", bin_net_unix_table }, - { CTL_DIR, NET_IPV4, "ipv4", bin_net_ipv4_table }, - { CTL_DIR, NET_IPX, "ipx", bin_net_ipx_table }, - { CTL_DIR, NET_ATALK, "appletalk", bin_net_atalk_table }, - { CTL_DIR, NET_NETROM, "netrom", bin_net_netrom_table }, - { CTL_DIR, NET_AX25, "ax25", bin_net_ax25_table }, - /* NET_BRIDGE "bridge" no longer used */ - { CTL_DIR, NET_ROSE, "rose", bin_net_rose_table }, - { CTL_DIR, NET_IPV6, "ipv6", bin_net_ipv6_table }, - { CTL_DIR, NET_X25, "x25", bin_net_x25_table }, - { CTL_DIR, NET_TR, "token-ring", bin_net_tr_table }, - { CTL_DIR, NET_DECNET, "decnet", bin_net_decnet_table }, - /* NET_ECONET not used */ - { CTL_DIR, NET_SCTP, "sctp", bin_net_sctp_table }, - { CTL_DIR, NET_LLC, "llc", bin_net_llc_table }, - { CTL_DIR, NET_NETFILTER, "netfilter", bin_net_netfilter_table }, - /* NET_DCCP "dccp" no longer used */ - /* NET_IRDA "irda" no longer used */ - { CTL_INT, 2089, "nf_conntrack_max" }, - {} -}; - -static const struct bin_table bin_fs_quota_table[] = { - { CTL_INT, FS_DQ_LOOKUPS, "lookups" }, - { CTL_INT, FS_DQ_DROPS, "drops" }, - { CTL_INT, FS_DQ_READS, "reads" }, - { CTL_INT, FS_DQ_WRITES, "writes" }, - { CTL_INT, FS_DQ_CACHE_HITS, "cache_hits" }, - { CTL_INT, FS_DQ_ALLOCATED, "allocated_dquots" }, - { CTL_INT, FS_DQ_FREE, "free_dquots" }, - { CTL_INT, FS_DQ_SYNCS, "syncs" }, - { CTL_INT, FS_DQ_WARNINGS, "warnings" }, - {} -}; - -static const struct bin_table bin_fs_xfs_table[] = { - { CTL_INT, XFS_SGID_INHERIT, "irix_sgid_inherit" }, - { CTL_INT, XFS_SYMLINK_MODE, "irix_symlink_mode" }, - { CTL_INT, XFS_PANIC_MASK, "panic_mask" }, - - { CTL_INT, XFS_ERRLEVEL, "error_level" }, - { CTL_INT, XFS_SYNCD_TIMER, "xfssyncd_centisecs" }, - { CTL_INT, XFS_INHERIT_SYNC, "inherit_sync" }, - { CTL_INT, XFS_INHERIT_NODUMP, "inherit_nodump" }, - { CTL_INT, XFS_INHERIT_NOATIME, "inherit_noatime" }, - { CTL_INT, XFS_BUF_TIMER, "xfsbufd_centisecs" }, - { CTL_INT, XFS_BUF_AGE, "age_buffer_centisecs" }, - { CTL_INT, XFS_INHERIT_NOSYM, "inherit_nosymlinks" }, - { CTL_INT, XFS_ROTORSTEP, "rotorstep" }, - { CTL_INT, XFS_INHERIT_NODFRG, "inherit_nodefrag" }, - { CTL_INT, XFS_FILESTREAM_TIMER, "filestream_centisecs" }, - { CTL_INT, XFS_STATS_CLEAR, "stats_clear" }, - {} -}; - -static const struct bin_table bin_fs_ocfs2_nm_table[] = { - { CTL_STR, 1, "hb_ctl_path" }, - {} -}; - -static const struct bin_table bin_fs_ocfs2_table[] = { - { CTL_DIR, 1, "nm", bin_fs_ocfs2_nm_table }, - {} -}; - -static const struct bin_table bin_inotify_table[] = { - { CTL_INT, INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, - { CTL_INT, INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, - { CTL_INT, INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, - {} -}; - -static const struct bin_table bin_fs_table[] = { - { CTL_INT, FS_NRINODE, "inode-nr" }, - { CTL_INT, FS_STATINODE, "inode-state" }, - /* FS_MAXINODE unused */ - /* FS_NRDQUOT unused */ - /* FS_MAXDQUOT unused */ - /* FS_NRFILE "file-nr" no longer used */ - { CTL_INT, FS_MAXFILE, "file-max" }, - { CTL_INT, FS_DENTRY, "dentry-state" }, - /* FS_NRSUPER unused */ - /* FS_MAXUPSER unused */ - { CTL_INT, FS_OVERFLOWUID, "overflowuid" }, - { CTL_INT, FS_OVERFLOWGID, "overflowgid" }, - { CTL_INT, FS_LEASES, "leases-enable" }, - { CTL_INT, FS_DIR_NOTIFY, "dir-notify-enable" }, - { CTL_INT, FS_LEASE_TIME, "lease-break-time" }, - { CTL_DIR, FS_DQSTATS, "quota", bin_fs_quota_table }, - { CTL_DIR, FS_XFS, "xfs", bin_fs_xfs_table }, - { CTL_ULONG, FS_AIO_NR, "aio-nr" }, - { CTL_ULONG, FS_AIO_MAX_NR, "aio-max-nr" }, - { CTL_DIR, FS_INOTIFY, "inotify", bin_inotify_table }, - { CTL_DIR, FS_OCFS2, "ocfs2", bin_fs_ocfs2_table }, - { CTL_INT, KERN_SETUID_DUMPABLE, "suid_dumpable" }, - {} -}; - -static const struct bin_table bin_ipmi_table[] = { - { CTL_INT, DEV_IPMI_POWEROFF_POWERCYCLE, "poweroff_powercycle" }, - {} -}; - -static const struct bin_table bin_mac_hid_files[] = { - /* DEV_MAC_HID_KEYBOARD_SENDS_LINUX_KEYCODES unused */ - /* DEV_MAC_HID_KEYBOARD_LOCK_KEYCODES unused */ - { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON_EMULATION, "mouse_button_emulation" }, - { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON2_KEYCODE, "mouse_button2_keycode" }, - { CTL_INT, DEV_MAC_HID_MOUSE_BUTTON3_KEYCODE, "mouse_button3_keycode" }, - /* DEV_MAC_HID_ADB_MOUSE_SENDS_KEYCODES unused */ - {} -}; - -static const struct bin_table bin_raid_table[] = { - { CTL_INT, DEV_RAID_SPEED_LIMIT_MIN, "speed_limit_min" }, - { CTL_INT, DEV_RAID_SPEED_LIMIT_MAX, "speed_limit_max" }, - {} -}; - -static const struct bin_table bin_scsi_table[] = { - { CTL_INT, DEV_SCSI_LOGGING_LEVEL, "logging_level" }, - {} -}; - -static const struct bin_table bin_dev_table[] = { - /* DEV_CDROM "cdrom" no longer used */ - /* DEV_HWMON unused */ - /* DEV_PARPORT "parport" no longer used */ - { CTL_DIR, DEV_RAID, "raid", bin_raid_table }, - { CTL_DIR, DEV_MAC_HID, "mac_hid", bin_mac_hid_files }, - { CTL_DIR, DEV_SCSI, "scsi", bin_scsi_table }, - { CTL_DIR, DEV_IPMI, "ipmi", bin_ipmi_table }, - {} -}; - -static const struct bin_table bin_bus_isa_table[] = { - { CTL_INT, BUS_ISA_MEM_BASE, "membase" }, - { CTL_INT, BUS_ISA_PORT_BASE, "portbase" }, - { CTL_INT, BUS_ISA_PORT_SHIFT, "portshift" }, - {} -}; - -static const struct bin_table bin_bus_table[] = { - { CTL_DIR, CTL_BUS_ISA, "isa", bin_bus_isa_table }, - {} -}; - - -static const struct bin_table bin_s390dbf_table[] = { - { CTL_INT, 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, - { CTL_INT, 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, - {} -}; - -static const struct bin_table bin_sunrpc_table[] = { - /* CTL_RPCDEBUG "rpc_debug" no longer used */ - /* CTL_NFSDEBUG "nfs_debug" no longer used */ - /* CTL_NFSDDEBUG "nfsd_debug" no longer used */ - /* CTL_NLMDEBUG "nlm_debug" no longer used */ - - { CTL_INT, CTL_SLOTTABLE_UDP, "udp_slot_table_entries" }, - { CTL_INT, CTL_SLOTTABLE_TCP, "tcp_slot_table_entries" }, - { CTL_INT, CTL_MIN_RESVPORT, "min_resvport" }, - { CTL_INT, CTL_MAX_RESVPORT, "max_resvport" }, - {} -}; - -static const struct bin_table bin_pm_table[] = { - /* frv specific */ - /* 1 == CTL_PM_SUSPEND "suspend" no longer used" */ - { CTL_INT, 2 /* CTL_PM_CMODE */, "cmode" }, - { CTL_INT, 3 /* CTL_PM_P0 */, "p0" }, - { CTL_INT, 4 /* CTL_PM_CM */, "cm" }, - {} -}; - -static const struct bin_table bin_root_table[] = { - { CTL_DIR, CTL_KERN, "kernel", bin_kern_table }, - { CTL_DIR, CTL_VM, "vm", bin_vm_table }, - { CTL_DIR, CTL_NET, "net", bin_net_table }, - /* CTL_PROC not used */ - { CTL_DIR, CTL_FS, "fs", bin_fs_table }, - /* CTL_DEBUG "debug" no longer used */ - { CTL_DIR, CTL_DEV, "dev", bin_dev_table }, - { CTL_DIR, CTL_BUS, "bus", bin_bus_table }, - { CTL_DIR, CTL_ABI, "abi" }, - /* CTL_CPU not used */ - /* CTL_ARLAN "arlan" no longer used */ - { CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table }, - { CTL_DIR, CTL_SUNRPC, "sunrpc", bin_sunrpc_table }, - { CTL_DIR, CTL_PM, "pm", bin_pm_table }, - {} -}; - -static ssize_t bin_dir(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - return -ENOTDIR; -} - - -static ssize_t bin_string(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - ssize_t result, copied = 0; - - if (oldval && oldlen) { - char __user *lastp; - loff_t pos = 0; - int ch; - - result = vfs_read(file, oldval, oldlen, &pos); - if (result < 0) - goto out; - - copied = result; - lastp = oldval + copied - 1; - - result = -EFAULT; - if (get_user(ch, lastp)) - goto out; - - /* Trim off the trailing newline */ - if (ch == '\n') { - result = -EFAULT; - if (put_user('\0', lastp)) - goto out; - copied -= 1; - } - } - - if (newval && newlen) { - loff_t pos = 0; - - result = vfs_write(file, newval, newlen, &pos); - if (result < 0) - goto out; - } - - result = copied; -out: - return result; -} - -static ssize_t bin_intvec(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - ssize_t copied = 0; - char *buffer; - ssize_t result; - - result = -ENOMEM; - buffer = kmalloc(BUFSZ, GFP_KERNEL); - if (!buffer) - goto out; - - if (oldval && oldlen) { - unsigned __user *vec = oldval; - size_t length = oldlen / sizeof(*vec); - char *str, *end; - int i; - loff_t pos = 0; - - result = kernel_read(file, buffer, BUFSZ - 1, &pos); - if (result < 0) - goto out_kfree; - - str = buffer; - end = str + result; - *end++ = '\0'; - for (i = 0; i < length; i++) { - unsigned long value; - - value = simple_strtoul(str, &str, 10); - while (isspace(*str)) - str++; - - result = -EFAULT; - if (put_user(value, vec + i)) - goto out_kfree; - - copied += sizeof(*vec); - if (!isdigit(*str)) - break; - } - } - - if (newval && newlen) { - unsigned __user *vec = newval; - size_t length = newlen / sizeof(*vec); - char *str, *end; - int i; - loff_t pos = 0; - - str = buffer; - end = str + BUFSZ; - for (i = 0; i < length; i++) { - unsigned long value; - - result = -EFAULT; - if (get_user(value, vec + i)) - goto out_kfree; - - str += scnprintf(str, end - str, "%lu\t", value); - } - - result = kernel_write(file, buffer, str - buffer, &pos); - if (result < 0) - goto out_kfree; - } - result = copied; -out_kfree: - kfree(buffer); -out: - return result; -} - -static ssize_t bin_ulongvec(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - ssize_t copied = 0; - char *buffer; - ssize_t result; - - result = -ENOMEM; - buffer = kmalloc(BUFSZ, GFP_KERNEL); - if (!buffer) - goto out; - - if (oldval && oldlen) { - unsigned long __user *vec = oldval; - size_t length = oldlen / sizeof(*vec); - char *str, *end; - int i; - loff_t pos = 0; - - result = kernel_read(file, buffer, BUFSZ - 1, &pos); - if (result < 0) - goto out_kfree; - - str = buffer; - end = str + result; - *end++ = '\0'; - for (i = 0; i < length; i++) { - unsigned long value; - - value = simple_strtoul(str, &str, 10); - while (isspace(*str)) - str++; - - result = -EFAULT; - if (put_user(value, vec + i)) - goto out_kfree; - - copied += sizeof(*vec); - if (!isdigit(*str)) - break; - } - } - - if (newval && newlen) { - unsigned long __user *vec = newval; - size_t length = newlen / sizeof(*vec); - char *str, *end; - int i; - loff_t pos = 0; - - str = buffer; - end = str + BUFSZ; - for (i = 0; i < length; i++) { - unsigned long value; - - result = -EFAULT; - if (get_user(value, vec + i)) - goto out_kfree; - - str += scnprintf(str, end - str, "%lu\t", value); - } - - result = kernel_write(file, buffer, str - buffer, &pos); - if (result < 0) - goto out_kfree; - } - result = copied; -out_kfree: - kfree(buffer); -out: - return result; -} - -static ssize_t bin_uuid(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - ssize_t result, copied = 0; - - /* Only supports reads */ - if (oldval && oldlen) { - char buf[UUID_STRING_LEN + 1]; - uuid_t uuid; - loff_t pos = 0; - - result = kernel_read(file, buf, sizeof(buf) - 1, &pos); - if (result < 0) - goto out; - - buf[result] = '\0'; - - result = -EIO; - if (uuid_parse(buf, &uuid)) - goto out; - - if (oldlen > 16) - oldlen = 16; - - result = -EFAULT; - if (copy_to_user(oldval, &uuid, oldlen)) - goto out; - - copied = oldlen; - } - result = copied; -out: - return result; -} - -static ssize_t bin_dn_node_address(struct file *file, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - ssize_t result, copied = 0; - - if (oldval && oldlen) { - char buf[15], *nodep; - unsigned long area, node; - __le16 dnaddr; - loff_t pos = 0; - - result = kernel_read(file, buf, sizeof(buf) - 1, &pos); - if (result < 0) - goto out; - - buf[result] = '\0'; - - /* Convert the decnet address to binary */ - result = -EIO; - nodep = strchr(buf, '.'); - if (!nodep) - goto out; - ++nodep; - - area = simple_strtoul(buf, NULL, 10); - node = simple_strtoul(nodep, NULL, 10); - - result = -EIO; - if ((area > 63)||(node > 1023)) - goto out; - - dnaddr = cpu_to_le16((area << 10) | node); - - result = -EFAULT; - if (put_user(dnaddr, (__le16 __user *)oldval)) - goto out; - - copied = sizeof(dnaddr); - } - - if (newval && newlen) { - __le16 dnaddr; - char buf[15]; - int len; - loff_t pos = 0; - - result = -EINVAL; - if (newlen != sizeof(dnaddr)) - goto out; - - result = -EFAULT; - if (get_user(dnaddr, (__le16 __user *)newval)) - goto out; - - len = scnprintf(buf, sizeof(buf), "%hu.%hu", - le16_to_cpu(dnaddr) >> 10, - le16_to_cpu(dnaddr) & 0x3ff); - - result = kernel_write(file, buf, len, &pos); - if (result < 0) - goto out; - } - - result = copied; -out: - return result; -} - -static const struct bin_table *get_sysctl(const int *name, int nlen, char *path) -{ - const struct bin_table *table = &bin_root_table[0]; - int ctl_name; - - /* The binary sysctl tables have a small maximum depth so - * there is no danger of overflowing our path as it PATH_MAX - * bytes long. - */ - memcpy(path, "sys/", 4); - path += 4; - -repeat: - if (!nlen) - return ERR_PTR(-ENOTDIR); - ctl_name = *name; - name++; - nlen--; - for ( ; table->convert; table++) { - int len = 0; - - /* - * For a wild card entry map from ifindex to network - * device name. - */ - if (!table->ctl_name) { -#ifdef CONFIG_NET - struct net *net = current->nsproxy->net_ns; - struct net_device *dev; - dev = dev_get_by_index(net, ctl_name); - if (dev) { - len = strlen(dev->name); - memcpy(path, dev->name, len); - dev_put(dev); - } -#endif - /* Use the well known sysctl number to proc name mapping */ - } else if (ctl_name == table->ctl_name) { - len = strlen(table->procname); - memcpy(path, table->procname, len); - } - if (len) { - path += len; - if (table->child) { - *path++ = '/'; - table = table->child; - goto repeat; - } - *path = '\0'; - return table; - } - } - return ERR_PTR(-ENOTDIR); -} - -static char *sysctl_getname(const int *name, int nlen, const struct bin_table **tablep) -{ - char *tmp, *result; - - result = ERR_PTR(-ENOMEM); - tmp = __getname(); - if (tmp) { - const struct bin_table *table = get_sysctl(name, nlen, tmp); - result = tmp; - *tablep = table; - if (IS_ERR(table)) { - __putname(tmp); - result = ERR_CAST(table); - } - } - return result; -} - -static ssize_t binary_sysctl(const int *name, int nlen, - void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) -{ - const struct bin_table *table = NULL; - struct vfsmount *mnt; - struct file *file; - ssize_t result; - char *pathname; - int flags; - - pathname = sysctl_getname(name, nlen, &table); - result = PTR_ERR(pathname); - if (IS_ERR(pathname)) - goto out; - - /* How should the sysctl be accessed? */ - if (oldval && oldlen && newval && newlen) { - flags = O_RDWR; - } else if (newval && newlen) { - flags = O_WRONLY; - } else if (oldval && oldlen) { - flags = O_RDONLY; - } else { - result = 0; - goto out_putname; - } - - mnt = task_active_pid_ns(current)->proc_mnt; - file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0); - result = PTR_ERR(file); - if (IS_ERR(file)) - goto out_putname; - - result = table->convert(file, oldval, oldlen, newval, newlen); - - fput(file); -out_putname: - __putname(pathname); -out: - return result; -} - - -#else /* CONFIG_SYSCTL_SYSCALL */ - static ssize_t binary_sysctl(const int *name, int nlen, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { return -ENOSYS; } -#endif /* CONFIG_SYSCTL_SYSCALL */ - - static void deprecated_sysctl_warning(const int *name, int nlen) { int i; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 65605530ee34..8de90ea31280 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -966,7 +966,8 @@ static int enqueue_hrtimer(struct hrtimer *timer, base->cpu_base->active_bases |= 1 << base->index; - timer->state = HRTIMER_STATE_ENQUEUED; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); return timerqueue_add(&base->active, &timer->node); } @@ -988,7 +989,8 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_cpu_base *cpu_base = base->cpu_base; u8 state = timer->state; - timer->state = newstate; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, newstate); if (!(state & HRTIMER_STATE_ENQUEUED)) return; @@ -1013,8 +1015,9 @@ static void __remove_hrtimer(struct hrtimer *timer, static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) { - if (hrtimer_is_queued(timer)) { - u8 state = timer->state; + u8 state = timer->state; + + if (state & HRTIMER_STATE_ENQUEUED) { int reprogram; /* @@ -1940,7 +1943,7 @@ out: return ret; } -#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) +#ifdef CONFIG_64BIT SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, struct __kernel_timespec __user *, rmtp) diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 77f1e5635cc1..9e59c9ea92aa 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -26,7 +26,7 @@ * Returns the delta between the expiry time and now, which can be * less than zero or 1usec for an pending expired timer */ -static struct timeval itimer_get_remtime(struct hrtimer *timer) +static struct timespec64 itimer_get_remtime(struct hrtimer *timer) { ktime_t rem = __hrtimer_get_remaining(timer, true); @@ -41,11 +41,11 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer) } else rem = 0; - return ktime_to_timeval(rem); + return ktime_to_timespec64(rem); } static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, - struct itimerval *const value) + struct itimerspec64 *const value) { u64 val, interval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; @@ -69,11 +69,11 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, spin_unlock_irq(&tsk->sighand->siglock); - value->it_value = ns_to_timeval(val); - value->it_interval = ns_to_timeval(interval); + value->it_value = ns_to_timespec64(val); + value->it_interval = ns_to_timespec64(interval); } -int do_getitimer(int which, struct itimerval *value) +static int do_getitimer(int which, struct itimerspec64 *value) { struct task_struct *tsk = current; @@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value) spin_lock_irq(&tsk->sighand->siglock); value->it_value = itimer_get_remtime(&tsk->signal->real_timer); value->it_interval = - ktime_to_timeval(tsk->signal->it_real_incr); + ktime_to_timespec64(tsk->signal->it_real_incr); spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: @@ -97,34 +97,59 @@ int do_getitimer(int which, struct itimerval *value) return 0; } +static int put_itimerval(struct itimerval __user *o, + const struct itimerspec64 *i) +{ + struct itimerval v; + + v.it_interval.tv_sec = i->it_interval.tv_sec; + v.it_interval.tv_usec = i->it_interval.tv_nsec / NSEC_PER_USEC; + v.it_value.tv_sec = i->it_value.tv_sec; + v.it_value.tv_usec = i->it_value.tv_nsec / NSEC_PER_USEC; + return copy_to_user(o, &v, sizeof(struct itimerval)) ? -EFAULT : 0; +} + + SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) { - int error = -EFAULT; - struct itimerval get_buffer; + struct itimerspec64 get_buffer; + int error = do_getitimer(which, &get_buffer); - if (value) { - error = do_getitimer(which, &get_buffer); - if (!error && - copy_to_user(value, &get_buffer, sizeof(get_buffer))) - error = -EFAULT; - } + if (!error && put_itimerval(value, &get_buffer)) + error = -EFAULT; return error; } -#ifdef CONFIG_COMPAT +#if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA) +struct old_itimerval32 { + struct old_timeval32 it_interval; + struct old_timeval32 it_value; +}; + +static int put_old_itimerval32(struct old_itimerval32 __user *o, + const struct itimerspec64 *i) +{ + struct old_itimerval32 v32; + + v32.it_interval.tv_sec = i->it_interval.tv_sec; + v32.it_interval.tv_usec = i->it_interval.tv_nsec / NSEC_PER_USEC; + v32.it_value.tv_sec = i->it_value.tv_sec; + v32.it_value.tv_usec = i->it_value.tv_nsec / NSEC_PER_USEC; + return copy_to_user(o, &v32, sizeof(struct old_itimerval32)) ? -EFAULT : 0; +} + COMPAT_SYSCALL_DEFINE2(getitimer, int, which, - struct compat_itimerval __user *, it) + struct old_itimerval32 __user *, value) { - struct itimerval kit; - int error = do_getitimer(which, &kit); + struct itimerspec64 get_buffer; + int error = do_getitimer(which, &get_buffer); - if (!error && put_compat_itimerval(it, &kit)) + if (!error && put_old_itimerval32(value, &get_buffer)) error = -EFAULT; return error; } #endif - /* * The timer is automagically restarted, when interval != 0 */ @@ -141,8 +166,8 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) } static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, - const struct itimerval *const value, - struct itimerval *const ovalue) + const struct itimerspec64 *const value, + struct itimerspec64 *const ovalue) { u64 oval, nval, ointerval, ninterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; @@ -151,8 +176,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, * Use the to_ktime conversion because that clamps the maximum * value to KTIME_MAX and avoid multiplication overflows. */ - nval = ktime_to_ns(timeval_to_ktime(value->it_value)); - ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval)); + nval = timespec64_to_ns(&value->it_value); + ninterval = timespec64_to_ns(&value->it_interval); spin_lock_irq(&tsk->sighand->siglock); @@ -171,8 +196,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, spin_unlock_irq(&tsk->sighand->siglock); if (ovalue) { - ovalue->it_value = ns_to_timeval(oval); - ovalue->it_interval = ns_to_timeval(ointerval); + ovalue->it_value = ns_to_timespec64(oval); + ovalue->it_interval = ns_to_timespec64(ointerval); } } @@ -182,19 +207,13 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, #define timeval_valid(t) \ (((t)->tv_sec >= 0) && (((unsigned long) (t)->tv_usec) < USEC_PER_SEC)) -int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) +static int do_setitimer(int which, struct itimerspec64 *value, + struct itimerspec64 *ovalue) { struct task_struct *tsk = current; struct hrtimer *timer; ktime_t expires; - /* - * Validate the timevals in value. - */ - if (!timeval_valid(&value->it_value) || - !timeval_valid(&value->it_interval)) - return -EINVAL; - switch (which) { case ITIMER_REAL: again: @@ -203,7 +222,7 @@ again: if (ovalue) { ovalue->it_value = itimer_get_remtime(timer); ovalue->it_interval - = ktime_to_timeval(tsk->signal->it_real_incr); + = ktime_to_timespec64(tsk->signal->it_real_incr); } /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { @@ -211,10 +230,10 @@ again: hrtimer_cancel_wait_running(timer); goto again; } - expires = timeval_to_ktime(value->it_value); + expires = timespec64_to_ktime(value->it_value); if (expires != 0) { tsk->signal->it_real_incr = - timeval_to_ktime(value->it_interval); + timespec64_to_ktime(value->it_interval); hrtimer_start(timer, expires, HRTIMER_MODE_REL); } else tsk->signal->it_real_incr = 0; @@ -234,6 +253,17 @@ again: return 0; } +#ifdef CONFIG_SECURITY_SELINUX +void clear_itimer(void) +{ + struct itimerspec64 v = {}; + int i; + + for (i = 0; i < 3; i++) + do_setitimer(i, &v, NULL); +} +#endif + #ifdef __ARCH_WANT_SYS_ALARM /** @@ -250,15 +280,15 @@ again: */ static unsigned int alarm_setitimer(unsigned int seconds) { - struct itimerval it_new, it_old; + struct itimerspec64 it_new, it_old; #if BITS_PER_LONG < 64 if (seconds > INT_MAX) seconds = INT_MAX; #endif it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_nsec = 0; + it_new.it_interval.tv_sec = it_new.it_interval.tv_nsec = 0; do_setitimer(ITIMER_REAL, &it_new, &it_old); @@ -266,8 +296,8 @@ static unsigned int alarm_setitimer(unsigned int seconds) * We can't return 0 if we have an alarm pending ... And we'd * better return too much than too little anyway */ - if ((!it_old.it_value.tv_sec && it_old.it_value.tv_usec) || - it_old.it_value.tv_usec >= 500000) + if ((!it_old.it_value.tv_sec && it_old.it_value.tv_nsec) || + it_old.it_value.tv_nsec >= (NSEC_PER_SEC / 2)) it_old.it_value.tv_sec++; return it_old.it_value.tv_sec; @@ -284,15 +314,35 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif +static int get_itimerval(struct itimerspec64 *o, const struct itimerval __user *i) +{ + struct itimerval v; + + if (copy_from_user(&v, i, sizeof(struct itimerval))) + return -EFAULT; + + /* Validate the timevals in value. */ + if (!timeval_valid(&v.it_value) || + !timeval_valid(&v.it_interval)) + return -EINVAL; + + o->it_interval.tv_sec = v.it_interval.tv_sec; + o->it_interval.tv_nsec = v.it_interval.tv_usec * NSEC_PER_USEC; + o->it_value.tv_sec = v.it_value.tv_sec; + o->it_value.tv_nsec = v.it_value.tv_usec * NSEC_PER_USEC; + return 0; +} + SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, struct itimerval __user *, ovalue) { - struct itimerval set_buffer, get_buffer; + struct itimerspec64 set_buffer, get_buffer; int error; if (value) { - if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) - return -EFAULT; + error = get_itimerval(&set_buffer, value); + if (error) + return error; } else { memset(&set_buffer, 0, sizeof(set_buffer)); printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer." @@ -304,30 +354,53 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, if (error || !ovalue) return error; - if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer))) + if (put_itimerval(ovalue, &get_buffer)) + return -EFAULT; + return 0; +} + +#if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA) +static int get_old_itimerval32(struct itimerspec64 *o, const struct old_itimerval32 __user *i) +{ + struct old_itimerval32 v32; + + if (copy_from_user(&v32, i, sizeof(struct old_itimerval32))) return -EFAULT; + + /* Validate the timevals in value. */ + if (!timeval_valid(&v32.it_value) || + !timeval_valid(&v32.it_interval)) + return -EINVAL; + + o->it_interval.tv_sec = v32.it_interval.tv_sec; + o->it_interval.tv_nsec = v32.it_interval.tv_usec * NSEC_PER_USEC; + o->it_value.tv_sec = v32.it_value.tv_sec; + o->it_value.tv_nsec = v32.it_value.tv_usec * NSEC_PER_USEC; return 0; } -#ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(setitimer, int, which, - struct compat_itimerval __user *, in, - struct compat_itimerval __user *, out) + struct old_itimerval32 __user *, value, + struct old_itimerval32 __user *, ovalue) { - struct itimerval kin, kout; + struct itimerspec64 set_buffer, get_buffer; int error; - if (in) { - if (get_compat_itimerval(&kin, in)) - return -EFAULT; + if (value) { + error = get_old_itimerval32(&set_buffer, value); + if (error) + return error; } else { - memset(&kin, 0, sizeof(kin)); + memset(&set_buffer, 0, sizeof(set_buffer)); + printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer." + " Misfeature support will be removed\n", + current->comm); } - error = do_setitimer(which, &kin, out ? &kout : NULL); - if (error || !out) + error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL); + if (error || !ovalue) return error; - if (put_compat_itimerval(out, &kout)) + if (put_old_itimerval32(ovalue, &get_buffer)) return -EFAULT; return 0; } diff --git a/kernel/time/time.c b/kernel/time/time.c index 5c54ca632d08..704ccd9451b0 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -59,9 +59,9 @@ EXPORT_SYMBOL(sys_tz); * why not move it into the appropriate arch directory (for those * architectures that need it). */ -SYSCALL_DEFINE1(time, time_t __user *, tloc) +SYSCALL_DEFINE1(time, __kernel_old_time_t __user *, tloc) { - time_t i = (time_t)ktime_get_real_seconds(); + __kernel_old_time_t i = (__kernel_old_time_t)ktime_get_real_seconds(); if (tloc) { if (put_user(i,tloc)) @@ -78,7 +78,7 @@ SYSCALL_DEFINE1(time, time_t __user *, tloc) * architectures that need it). */ -SYSCALL_DEFINE1(stime, time_t __user *, tptr) +SYSCALL_DEFINE1(stime, __kernel_old_time_t __user *, tptr) { struct timespec64 tv; int err; @@ -137,7 +137,7 @@ SYSCALL_DEFINE1(stime32, old_time32_t __user *, tptr) #endif /* __ARCH_WANT_SYS_TIME32 */ #endif -SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, +SYSCALL_DEFINE2(gettimeofday, struct __kernel_old_timeval __user *, tv, struct timezone __user *, tz) { if (likely(tv != NULL)) { @@ -179,7 +179,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz return error; if (tz) { - /* Verify we're witin the +-15 hrs range */ + /* Verify we're within the +-15 hrs range */ if (tz->tz_minuteswest > 15*60 || tz->tz_minuteswest < -15*60) return -EINVAL; @@ -196,22 +196,21 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz return 0; } -SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, +SYSCALL_DEFINE2(settimeofday, struct __kernel_old_timeval __user *, tv, struct timezone __user *, tz) { struct timespec64 new_ts; - struct timeval user_tv; struct timezone new_tz; if (tv) { - if (copy_from_user(&user_tv, tv, sizeof(*tv))) + if (get_user(new_ts.tv_sec, &tv->tv_sec) || + get_user(new_ts.tv_nsec, &tv->tv_usec)) return -EFAULT; - if (!timeval_valid(&user_tv)) + if (new_ts.tv_nsec > USEC_PER_SEC || new_ts.tv_nsec < 0) return -EINVAL; - new_ts.tv_sec = user_tv.tv_sec; - new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; + new_ts.tv_nsec *= NSEC_PER_USEC; } if (tz) { if (copy_from_user(&new_tz, tz, sizeof(*tz))) @@ -245,18 +244,17 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, struct timezone __user *, tz) { struct timespec64 new_ts; - struct timeval user_tv; struct timezone new_tz; if (tv) { - if (compat_get_timeval(&user_tv, tv)) + if (get_user(new_ts.tv_sec, &tv->tv_sec) || + get_user(new_ts.tv_nsec, &tv->tv_usec)) return -EFAULT; - if (!timeval_valid(&user_tv)) + if (new_ts.tv_nsec > USEC_PER_SEC || new_ts.tv_nsec < 0) return -EINVAL; - new_ts.tv_sec = user_tv.tv_sec; - new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; + new_ts.tv_nsec *= NSEC_PER_USEC; } if (tz) { if (copy_from_user(&new_tz, tz, sizeof(*tz))) @@ -267,7 +265,7 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, } #endif -#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT) +#ifdef CONFIG_64BIT SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p) { struct __kernel_timex txc; /* Local copy of parameter */ @@ -550,18 +548,21 @@ EXPORT_SYMBOL(set_normalized_timespec64); */ struct timespec64 ns_to_timespec64(const s64 nsec) { - struct timespec64 ts; + struct timespec64 ts = { 0, 0 }; s32 rem; - if (!nsec) - return (struct timespec64) {0, 0}; - - ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); - if (unlikely(rem < 0)) { - ts.tv_sec--; - rem += NSEC_PER_SEC; + if (likely(nsec > 0)) { + ts.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); + ts.tv_nsec = rem; + } else if (nsec < 0) { + /* + * With negative times, tv_sec points to the earlier + * second, and tv_nsec counts the nanoseconds since + * then, so tv_nsec is always a positive number. + */ + ts.tv_sec = -div_u64_rem(-nsec - 1, NSEC_PER_SEC, &rem) - 1; + ts.tv_nsec = NSEC_PER_SEC - rem - 1; } - ts.tv_nsec = rem; return ts; } @@ -880,10 +881,11 @@ int get_timespec64(struct timespec64 *ts, ts->tv_sec = kts.tv_sec; - /* Zero out the padding for 32 bit systems or in compat mode */ - if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall()) + /* Zero out the padding in compat mode */ + if (in_compat_syscall()) kts.tv_nsec &= 0xFFFFFFFFUL; + /* In 32-bit mode, this drops the padding */ ts->tv_nsec = kts.tv_nsec; return 0; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cdf5afa87f65..25a0fcfa7a5d 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -671,6 +671,15 @@ config HIST_TRIGGERS See Documentation/trace/histogram.rst. If in doubt, say N. +config TRACE_EVENT_INJECT + bool "Trace event injection" + depends on TRACING + help + Allow user-space to inject a specific trace event into the ring + buffer. This is mainly used for testing purpose. + + If unsure, say N. + config MMIOTRACE_TEST tristate "Test module for mmiotrace" depends on MMIOTRACE && m diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c2b2148bb1d2..0e63db62225f 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o endif obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o +obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ffc91d4935ac..e5ef4ae9edb5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -739,7 +739,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig) return -EINVAL; work = this_cpu_ptr(&send_signal_work); - if (work->irq_work.flags & IRQ_WORK_BUSY) + if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) return -EBUSY; /* Add the current task, which is the target of sending signal, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 66358d66c933..4bf050fcfe3b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -11,6 +11,7 @@ #include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/irq_work.h> +#include <linux/security.h> #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kthread.h> /* for self test */ @@ -5068,6 +5069,11 @@ static __init int test_ringbuffer(void) int cpu; int ret = 0; + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Lockdown is enabled, skipping ring buffer tests\n"); + return 0; + } + pr_info("Running ring buffer tests...\n"); buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 02a23a6e5e00..23459d53d576 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1888,6 +1888,12 @@ int __init register_tracer(struct tracer *type) return -1; } + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Can not register tracer %s due to lockdown\n", + type->name); + return -EPERM; + } + mutex_lock(&trace_types_lock); tracing_selftest_running = true; @@ -8789,6 +8795,11 @@ struct dentry *tracing_init_dentry(void) { struct trace_array *tr = &global_trace; + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Tracing disabled due to lockdown\n"); + return ERR_PTR(-EPERM); + } + /* The top level trace array uses NULL as parent */ if (tr->dir) return NULL; @@ -9231,6 +9242,12 @@ __init static int tracer_alloc_buffers(void) int ring_buf_size; int ret = -ENOMEM; + + if (security_locked_down(LOCKDOWN_TRACEFS)) { + pr_warning("Tracing disabled due to lockdown\n"); + return -EPERM; + } + /* * Make sure we don't accidently add more trace options * than we have bits for. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ca7fccafbcbb..63bf60f79398 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1601,6 +1601,7 @@ extern struct list_head ftrace_events; extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; +extern const struct file_operations event_inject_fops; #ifdef CONFIG_HIST_TRIGGERS extern int register_trigger_hist_cmd(void); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6b3a69e9aa6a..c6de3cebc127 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2044,6 +2044,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) trace_create_file("format", 0444, file->dir, call, &ftrace_event_format_fops); +#ifdef CONFIG_TRACE_EVENT_INJECT + if (call->event.type && call->class->reg) + trace_create_file("inject", 0200, file->dir, file, + &event_inject_fops); +#endif + return 0; } diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c new file mode 100644 index 000000000000..d43710718ee5 --- /dev/null +++ b/kernel/trace/trace_events_inject.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_events_inject - trace event injection + * + * Copyright (C) 2019 Cong Wang <cwang@twitter.com> + */ + +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/rculist.h> + +#include "trace.h" + +static int +trace_inject_entry(struct trace_event_file *file, void *rec, int len) +{ + struct trace_event_buffer fbuffer; + struct ring_buffer *buffer; + int written = 0; + void *entry; + + rcu_read_lock_sched(); + buffer = file->tr->trace_buffer.buffer; + entry = trace_event_buffer_reserve(&fbuffer, file, len); + if (entry) { + memcpy(entry, rec, len); + written = len; + trace_event_buffer_commit(&fbuffer); + } + rcu_read_unlock_sched(); + + return written; +} + +static int +parse_field(char *str, struct trace_event_call *call, + struct ftrace_event_field **pf, u64 *pv) +{ + struct ftrace_event_field *field; + char *field_name; + int s, i = 0; + int len; + u64 val; + + if (!str[i]) + return 0; + /* First find the field to associate to */ + while (isspace(str[i])) + i++; + s = i; + while (isalnum(str[i]) || str[i] == '_') + i++; + len = i - s; + if (!len) + return -EINVAL; + + field_name = kmemdup_nul(str + s, len, GFP_KERNEL); + if (!field_name) + return -ENOMEM; + field = trace_find_event_field(call, field_name); + kfree(field_name); + if (!field) + return -ENOENT; + + *pf = field; + while (isspace(str[i])) + i++; + if (str[i] != '=') + return -EINVAL; + i++; + while (isspace(str[i])) + i++; + s = i; + if (isdigit(str[i]) || str[i] == '-') { + char *num, c; + int ret; + + /* Make sure the field is not a string */ + if (is_string_field(field)) + return -EINVAL; + + if (str[i] == '-') + i++; + + /* We allow 0xDEADBEEF */ + while (isalnum(str[i])) + i++; + num = str + s; + c = str[i]; + if (c != '\0' && !isspace(c)) + return -EINVAL; + str[i] = '\0'; + /* Make sure it is a value */ + if (field->is_signed) + ret = kstrtoll(num, 0, &val); + else + ret = kstrtoull(num, 0, &val); + str[i] = c; + if (ret) + return ret; + + *pv = val; + return i; + } else if (str[i] == '\'' || str[i] == '"') { + char q = str[i]; + + /* Make sure the field is OK for strings */ + if (!is_string_field(field)) + return -EINVAL; + + for (i++; str[i]; i++) { + if (str[i] == '\\' && str[i + 1]) { + i++; + continue; + } + if (str[i] == q) + break; + } + if (!str[i]) + return -EINVAL; + + /* Skip quotes */ + s++; + len = i - s; + if (len >= MAX_FILTER_STR_VAL) + return -EINVAL; + + *pv = (unsigned long)(str + s); + str[i] = 0; + /* go past the last quote */ + i++; + return i; + } + + return -EINVAL; +} + +static int trace_get_entry_size(struct trace_event_call *call) +{ + struct ftrace_event_field *field; + struct list_head *head; + int size = 0; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + if (field->size + field->offset > size) + size = field->size + field->offset; + } + + return size; +} + +static void *trace_alloc_entry(struct trace_event_call *call, int *size) +{ + int entry_size = trace_get_entry_size(call); + struct ftrace_event_field *field; + struct list_head *head; + void *entry = NULL; + + /* We need an extra '\0' at the end. */ + entry = kzalloc(entry_size + 1, GFP_KERNEL); + if (!entry) + return NULL; + + head = trace_get_fields(call); + list_for_each_entry(field, head, link) { + if (!is_string_field(field)) + continue; + if (field->filter_type == FILTER_STATIC_STRING) + continue; + if (field->filter_type == FILTER_DYN_STRING) { + u32 *str_item; + int str_loc = entry_size & 0xffff; + + str_item = (u32 *)(entry + field->offset); + *str_item = str_loc; /* string length is 0. */ + } else { + char **paddr; + + paddr = (char **)(entry + field->offset); + *paddr = ""; + } + } + + *size = entry_size + 1; + return entry; +} + +#define INJECT_STRING "STATIC STRING CAN NOT BE INJECTED" + +/* Caller is responsible to free the *pentry. */ +static int parse_entry(char *str, struct trace_event_call *call, void **pentry) +{ + struct ftrace_event_field *field; + unsigned long irq_flags; + void *entry = NULL; + int entry_size; + u64 val; + int len; + + entry = trace_alloc_entry(call, &entry_size); + *pentry = entry; + if (!entry) + return -ENOMEM; + + local_save_flags(irq_flags); + tracing_generic_entry_update(entry, call->event.type, irq_flags, + preempt_count()); + + while ((len = parse_field(str, call, &field, &val)) > 0) { + if (is_function_field(field)) + return -EINVAL; + + if (is_string_field(field)) { + char *addr = (char *)(unsigned long) val; + + if (field->filter_type == FILTER_STATIC_STRING) { + strlcpy(entry + field->offset, addr, field->size); + } else if (field->filter_type == FILTER_DYN_STRING) { + int str_len = strlen(addr) + 1; + int str_loc = entry_size & 0xffff; + u32 *str_item; + + entry_size += str_len; + *pentry = krealloc(entry, entry_size, GFP_KERNEL); + if (!*pentry) { + kfree(entry); + return -ENOMEM; + } + entry = *pentry; + + strlcpy(entry + (entry_size - str_len), addr, str_len); + str_item = (u32 *)(entry + field->offset); + *str_item = (str_len << 16) | str_loc; + } else { + char **paddr; + + paddr = (char **)(entry + field->offset); + *paddr = INJECT_STRING; + } + } else { + switch (field->size) { + case 1: { + u8 tmp = (u8) val; + + memcpy(entry + field->offset, &tmp, 1); + break; + } + case 2: { + u16 tmp = (u16) val; + + memcpy(entry + field->offset, &tmp, 2); + break; + } + case 4: { + u32 tmp = (u32) val; + + memcpy(entry + field->offset, &tmp, 4); + break; + } + case 8: + memcpy(entry + field->offset, &val, 8); + break; + default: + return -EINVAL; + } + } + + str += len; + } + + if (len < 0) + return len; + + return entry_size; +} + +static ssize_t +event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct trace_event_call *call; + struct trace_event_file *file; + int err = -ENODEV, size; + void *entry = NULL; + char *buf; + + if (cnt >= PAGE_SIZE) + return -EINVAL; + + buf = memdup_user_nul(ubuf, cnt); + if (IS_ERR(buf)) + return PTR_ERR(buf); + strim(buf); + + mutex_lock(&event_mutex); + file = event_file_data(filp); + if (file) { + call = file->event_call; + size = parse_entry(buf, call, &entry); + if (size < 0) + err = size; + else + err = trace_inject_entry(file, entry, size); + } + mutex_unlock(&event_mutex); + + kfree(entry); + kfree(buf); + + if (err < 0) + return err; + + *ppos += err; + return cnt; +} + +static ssize_t +event_inject_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) +{ + return -EPERM; +} + +const struct file_operations event_inject_fops = { + .open = tracing_open_generic, + .read = event_inject_read, + .write = event_inject_write, +}; |